예제 #1
0
    def __getitem__(self, key):
        # Just integer indices (no slices) for now
        if not isinstance(key, tuple):
            key = (key,)
        key = tuple([operator.index(i) for i in key])
        if len(key) > len(self._dshape) - 1:
           raise IndexError('Cannot have more indices than dimensions')

        # Apply each index in turn
        ds = self._dshape
        c_strides = ds.c_strides
        ptr = self._ptr
        for i, idx in enumerate(key):
            dim_size = operator.index(ds[i])
            # Implement Python's negative indexing
            if idx >= 0:
                if idx >= dim_size:
                    raise IndexError(('Index %d is out of range ' +
                                    'in dimension sized %d') % (idx, dim_size))
            else:
                if idx < -dim_size:
                    raise IndexError(('Index %d is out of range ' +
                                    'in dimension sized %d') % (idx, dim_size))
                idx += dim_size
            ptr = ptr + idx * c_strides[i]
        # Create the data shape of the result
        ds = ds.subarray(len(key))
        return MemBufDataDescriptor(ptr, self.ptr_owner, ds, self._writable)
예제 #2
0
파일: util.py 프로젝트: talumbau/datashape
def cat_dshapes(dslist):
    """
    Concatenates a list of dshapes together along
    the first axis. Raises an error if there is
    a mismatch along another axis or the measures
    are different.

    Requires that the leading dimension be a known
    size for all data shapes.
    TODO: Relax this restriction to support
          streaming dimensions.
    """
    if len(dslist) == 0:
        raise ValueError('Cannot concatenate an empty list of dshapes')
    elif len(dslist) == 1:
        return dslist[0]

    outer_dim_size = operator.index(dslist[0][0])
    inner_ds = dslist[0][1:]
    for ds in dslist[1:]:
        outer_dim_size += operator.index(ds[0])
        if ds[1:] != inner_ds:
            raise ValueError(('The datashapes to concatenate much'
                              ' all match after'
                              ' the first dimension (%s vs %s)') %
                              (inner_ds, ds[1:]))
    return coretypes.DataShape(*[coretypes.Fixed(outer_dim_size)] + list(inner_ds))
예제 #3
0
 def test_basic(self):
     self.o.ind = -2
     self.n.ind = 2
     import operator
     assert operator.index(self.o) == -2
     assert operator.index(self.n) == 2
     raises(TypeError, operator.index, self.o_no_index) 
     raises(TypeError, operator.index, self.n_no_index) 
예제 #4
0
def check_shape(args, current_shape=None):
    """Imitate numpy.matrix handling of shape arguments"""
    if len(args) == 0:
        raise TypeError("function missing 1 required positional argument: "
                        "'shape'")
    elif len(args) == 1:
        try:
            shape_iter = iter(args[0])
        except TypeError:
            new_shape = (operator.index(args[0]), )
        else:
            new_shape = tuple(operator.index(arg) for arg in shape_iter)
    else:
        new_shape = tuple(operator.index(arg) for arg in args)

    if current_shape is None:
        if len(new_shape) != 2:
            raise ValueError('shape must be a 2-tuple of positive integers')
        elif new_shape[0] < 0 or new_shape[1] < 0:
            raise ValueError("'shape' elements cannot be negative")

    else:
        # Check the current size only if needed
        current_size = np.prod(current_shape, dtype=int)

        # Check for negatives
        negative_indexes = [i for i, x in enumerate(new_shape) if x < 0]
        if len(negative_indexes) == 0:
            new_size = np.prod(new_shape, dtype=int)
            if new_size != current_size:
                raise ValueError('cannot reshape array of size {} into shape {}'
                                 .format(new_size, new_shape))
        elif len(negative_indexes) == 1:
            skip = negative_indexes[0]
            specified = np.prod(new_shape[0:skip] + new_shape[skip+1:])
            unspecified, remainder = divmod(current_size, specified)
            if remainder != 0:
                err_shape = tuple('newshape' if x < 0 else x for x in new_shape)
                raise ValueError('cannot reshape array of size {} into shape {}'
                                 ''.format(current_size, err_shape))
            new_shape = new_shape[0:skip] + (unspecified,) + new_shape[skip+1:]
        else:
            raise ValueError('can only specify one unknown dimension')

        # Add and remove ones like numpy.matrix.reshape
        if len(new_shape) != 2:
            new_shape = tuple(arg for arg in new_shape if arg != 1)

            if len(new_shape) == 0:
                new_shape = (1, 1)
            elif len(new_shape) == 1:
                new_shape = (1, new_shape[0])

    if len(new_shape) > 2:
        raise ValueError('shape too large to be a matrix')

    return new_shape
예제 #5
0
 def _get_item_ptr(self, idx):
     if len(idx) != self.nindex:
         raise IndexError('Incorrect number of indices (got %d, require %d)' %
                        (len(idx), self.nindex))
     idx = tuple(operator.index(i) for i in idx)
     if self.nindex > 0:
         idx = tuple([operator.index(i) for i in idx])
         c_strides = self._mbdd.dshape.c_strides[:self.nindex]
         offset = sum(stride * idx for stride, idx in zip(c_strides, idx))
         return self._mbdd.ptr + offset
     else:
         return self._mbdd.ptr
예제 #6
0
 def __new__(cls, width, height):
     if width <= 0:
         raise ValueError("width must be greater than 0")
     try:
         width = index(width)
     except TypeError:
         raise TypeError("width must be of integral value or provide " "an __index__ method")
     if height <= 0:
         raise ValueError("height must be greater than 0")
     try:
         height = index(height)
     except TypeError:
         raise TypeError("height must be of integral value or provide " "an __index__ method")
     return super(ImageSize, cls).__new__(cls, width, height)
예제 #7
0
def binom(n, k):
    """
    Binomial coefficient (n over k)
    """
    n = operator.index(n)
    k = operator.index(k)
    if not 0 <= k <= n:
        return 0
    m = n + 1
    num = 1
    den = 1
    for j in range(1, min(k, n - k) + 1):
        num *= m - j
        den *= j
    return num // den
예제 #8
0
 def index(self, value, start=None, stop=None):
     start = index(start) if start is not None else 0
     if stop is not None:
         stop = index(stop)
     if not (start == 0 and stop is None):
         l = self[start:stop]
     else:
         l = self
     idx = start
     while l is not nil:
         if l.car == value:
             return idx
         l = l.cdr
         idx += 1
     raise ValueError("'%s' not in list" % value)
예제 #9
0
파일: fractions.py 프로젝트: alexji/pynbody
    def __new__(cls, numerator=0, denominator=1):
        """Constructs a Fraction.

        Takes a string like '3/2' or '1.5', another Fraction, or a
        numerator/denominator pair.

        """
        self = super(Fraction, cls).__new__(cls)

        if type(numerator) not in (int, long) and denominator == 1:
            if isinstance(numerator, basestring):
                # Handle construction from strings.
                input = numerator
                m = _RATIONAL_FORMAT.match(input)
                if m is None:
                    raise ValueError(
                        'Invalid literal for Fraction: %r' % input)
                numerator = m.group('num')
                decimal = m.group('decimal')
                if decimal:
                    # The literal is a decimal number.
                    numerator = int(numerator + decimal)
                    denominator = 10 ** len(decimal)
                else:
                    # The literal is an integer or fraction.
                    numerator = int(numerator)
                    # Default denominator to 1.
                    denominator = int(m.group('denom') or 1)

                if m.group('sign') == '-':
                    numerator = -numerator

            elif isinstance(numerator, Rational):
                # Handle copies from other rationals. Integrals get
                # caught here too, but it doesn't matter because
                # denominator is already 1.
                other_rational = numerator
                numerator = other_rational.numerator
                denominator = other_rational.denominator

        if denominator == 0:
            raise ZeroDivisionError('Fraction(%s, 0)' % numerator)
        numerator = operator.index(numerator)
        denominator = operator.index(denominator)
        g = gcd(numerator, denominator)
        self._numerator = numerator // g
        self._denominator = denominator // g
        return self
예제 #10
0
파일: test_slice.py 프로젝트: 10sr/cpython
def slice_indices(slice, length):
    """
    Reference implementation for the slice.indices method.

    """
    # Compute step and length as integers.
    length = operator.index(length)
    step = 1 if slice.step is None else evaluate_slice_index(slice.step)

    # Raise ValueError for negative length or zero step.
    if length < 0:
        raise ValueError("length should not be negative")
    if step == 0:
        raise ValueError("slice step cannot be zero")

    # Find lower and upper bounds for start and stop.
    lower = -1 if step < 0 else 0
    upper = length - 1 if step < 0 else length

    # Compute start.
    if slice.start is None:
        start = upper if step < 0 else lower
    else:
        start = evaluate_slice_index(slice.start)
        start = max(start + length, lower) if start < 0 else min(start, upper)

    # Compute stop.
    if slice.stop is None:
        stop = lower if step < 0 else upper
    else:
        stop = evaluate_slice_index(slice.stop)
        stop = max(stop + length, lower) if stop < 0 else min(stop, upper)

    return start, stop, step
예제 #11
0
def to_int(self, v):
    try:
        int_pack(index(v))
    except (struct_error, TypeError):
        raise TypeError('32-bit integer expected')

    return int(v)
예제 #12
0
def _check_rank(rank):
    """Check rank parameter and deal with deprecation."""
    err_msg = ('rank must be None, dict, "full", or int, '
               'got %s (type %s)' % (rank, type(rank)))
    if isinstance(rank, str):
        # XXX we can use rank='' to deprecate to get to None eventually:
        # if rank == '':
        #     warn('The rank parameter default in 0.18 of "full" will change '
        #          'to None in 0.19, set it explicitly to avoid this warning',
        #          DeprecationWarning)
        #     rank = 'full'
        if rank not in ['full', 'info']:
            raise ValueError('rank, if str, must be "full" or "info", '
                             'got %s' % (rank,))
    elif isinstance(rank, bool):
        raise TypeError(err_msg)
    elif rank is not None and not isinstance(rank, dict):
        try:
            rank = int(operator.index(rank))
        except TypeError:
            raise TypeError(err_msg)
        else:
            warn('rank as int is deprecated and will be removed in 0.19. '
                 'use rank=dict(meg=...) instead.', DeprecationWarning)
            rank = dict(meg=rank)
    return rank
예제 #13
0
 def test_index(self):
     import operator
     assert operator.index(42) == 42
     assert operator.__index__(42) == 42
     raises(TypeError, operator.index, "abc")
     exc = raises(TypeError, operator.index, "abc")
     assert str(exc.value) == "'str' object cannot be interpreted as an integer"
예제 #14
0
 def __init__(self, text_node):
     self.index = index(text_node)
     self.msg_dict = {'type': 'setTextNode', 
                      'childTag': text_node.tag, 
                      'parentId': text_node.parentNode.id, 
                      'index': self.index,
                      'text': text_node.text }
예제 #15
0
 async def receive_some(self, max_bytes):
     async with self._receive_conflict_detector:
         # Argument validation
         max_bytes = operator.index(max_bytes)
         if max_bytes < 1:
             raise ValueError("max_bytes must be >= 1")
         # State validation
         if self._receiver_closed:
             raise ClosedStreamError
         # Wake wait_send_all_might_not_block and wait for data
         self._receiver_waiting = True
         self._something_happened()
         try:
             await self._wait_for(lambda: self._data or self._sender_closed)
         finally:
             self._receiver_waiting = False
         # Get data, possibly waking send_all
         if self._data:
             got = self._data[:max_bytes]
             del self._data[:max_bytes]
             self._something_happened()
             return got
         else:
             assert self._sender_closed
             return b""
예제 #16
0
    def test_index_returns_int_subclass(self):
        class BadInt:
            def __index__(self):
                return True

        class BadInt2(int):
            def __index__(self):
                return True

        bad_int = BadInt()
        n = operator.index(bad_int)
        self.assertEqual(n, 1)

        bad_int = BadInt2()
        n = operator.index(bad_int)
        self.assertEqual(n, 0)
예제 #17
0
    def __getitem__(self, key):
        """ This function reports the record of case number <key>.
        For example: firstRecord = SavReader(savFileName)[0].
        The <key> argument may also be a slice, for example:
        firstfiveRecords = SavReader(savFileName)[:5].
        You can also do stuff like (numpy required!):
        savReader(savFileName)[1:5, 1]"""

        is_slice = isinstance(key, slice)
        is_array_slice = key is Ellipsis or isinstance(key, tuple)

        if is_slice:
            start, stop, step = key.indices(self.nCases)
        elif is_array_slice:
            return self._get_array_slice(key, self.nCases, len(self.header))
        else:
            key = operator.index(key)
            start = key + self.nCases if key < 0 else key
            if not 0 <= start < self.nCases:
                raise IndexError("Index out of bounds")
            stop = start + 1
            step = 1

        records = self._items(start, stop, step)
        if is_slice:
            return list(records)
        return next(records)
예제 #18
0
 def test_proxy_index(self):
     class C:
         def __index__(self):
             return 10
     o = C()
     p = weakref.proxy(o)
     self.assertEqual(operator.index(p), 10)
예제 #19
0
def interpret(func, env, args, storage=None, **kwds):
    assert len(args) == len(func.args)

    # Make a copy, since we're going to mutate our IR!
    func, _ = copy_function(func)

    # If it's a BLZ output, we want an interpreter that streams
    # the processing through in chunks
    if storage is not None:
        if len(func.type.restype.shape) == 0:
            raise TypeError('Require an array, not a scalar, for outputting to BLZ')
        env['stream-outer'] = True
        result_ndim = env['result-ndim'] = len(func.type.restype.shape)
    else:
        # Convert any persistent inputs to memory
        # TODO: should stream the computation in this case
        for i, arg in enumerate(args):
            if isinstance(arg._data, BLZDataDescriptor):
                args[i] = arg[:]

    # Update environment with dynd type information
    dynd_types = dict((arg, get_dynd_type(array))
                          for arg, array in zip(func.args, args)
                              if isinstance(array._data, DyNDDataDescriptor))
    env['dynd-types'] = dynd_types

    # Lift ckernels
    func, env = run_pipeline(func, env, run_time_passes)

    if storage is None:
        # Evaluate once
        values = dict(zip(func.args, args))
        interp = CKernelInterp(values)
        visit(interp, func)
        return interp.result
    else:
        res_shape, res_dt = datashape.to_numpy(func.type.restype)
        dim_size = operator.index(res_shape[0])
        row_size = ndt.type(str(func.type.restype.subarray(1))).data_size
        chunk_size = min(max(1, (1024*1024) // row_size), dim_size)
        # Evaluate by streaming the outermost dimension,
        # and using the BLZ data descriptor's append
        dst_dd = BLZDataDescriptor(blz.zeros((0,)+res_shape[1:], res_dt,
                                             rootdir=storage.path))
        # Loop through all the chunks
        for chunk_start in range(0, dim_size, chunk_size):
            # Tell the interpreter which chunk size to use (last
            # chunk might be smaller)
            chunk_size = min(chunk_size, dim_size - chunk_start)
            # Evaluate the chunk
            args_chunk = [arg[chunk_start:chunk_start+chunk_size]
                            if len(arg.dshape.shape) == result_ndim
                            else arg for arg in args]
            values = dict(zip(func.args, args_chunk))
            interp = CKernelChunkInterp(values, chunk_size, result_ndim)
            visit(interp, func)
            chunk = interp.result._data.dynd_arr()
            dst_dd.append(chunk)
        return blaze.Array(dst_dd)
예제 #20
0
파일: row.py 프로젝트: SaraOgaz/astropy
    def __init__(self, table, index):
        self._table = table
        self._index = operator.index(index)

        n = len(table)
        if index < -n or index >= n:
            raise IndexError('index {0} out of range for table with length {1}'
                             .format(index, len(table)))
예제 #21
0
파일: test_index.py 프로젝트: ztane/jython3
    def test_index_returns_int_subclass(self):
        class BadInt:
            def __index__(self):
                return True

        class BadInt2(int):
            def __index__(self):
                return True

        bad_int = BadInt()
        with self.assertWarns(DeprecationWarning):
            n = operator.index(bad_int)
        self.assertEqual(n, 1)

        bad_int = BadInt2()
        n = operator.index(bad_int)
        self.assertEqual(n, 0)
예제 #22
0
 def _get_abs_string_index(self, idx):
     """Get the absolute index for the list of modules"""
     idx = operator.index(idx)
     if not (-len(self) <= idx < len(self)):
         raise IndexError('index {} is out of range'.format(idx))
     if idx < 0:
         idx += len(self)
     return str(idx)
예제 #23
0
 def _get_item_by_idx(self, iterator, idx):
     """Get the idx-th item of the iterator"""
     size = len(self)
     idx = operator.index(idx)
     if not -size <= idx < size:
         raise IndexError('index {} is out of range'.format(idx))
     idx %= size
     return next(islice(iterator, idx, None))
예제 #24
0
    def __init__(self, i):
        # Use operator.index, so Python integers, numpy int scalars, etc work
        i = operator.index(i)

        if i < 0:
            raise ValueError('Fixed dimensions must be positive')

        self.val = i
예제 #25
0
 def __getitem__(self, key):
     if not isinstance(key, slice):
         child_sbvalue = (self.__sbvalue.GetValueForExpressionPath("[%i]" % operator.index(key)))
         if child_sbvalue and child_sbvalue.IsValid():
             return Value(child_sbvalue)
         raise IndexError("Index '%d' is out of range" % key)
     else:
         return [self[i] for i in xrange(*key.indices(len(self)))]
예제 #26
0
 def bind_func(estruct, dst_dd, src_dd_list):
     for i, (ds, dd) in enumerate(
                     izip(self.dshapes, src_dd_list + [dst_dd])):
         shape = [operator.index(dim)
                         for dim in dd.dshape[-len(ds):-1]]
         cshape = getattr(estruct, 'operand_%d' % i)
         for j, dim_size in enumerate(shape):
             cshape[j] = dim_size
 def __eq__(self, item):
     """A Users is equal to a User if the ids match"""
     if isinstance(item, User):
         return self.id == item.id
     try:
         return self.id == index(item)
     except TypeError:
         return NotImplemented
예제 #28
0
def _ensure_int(x, name='unknown', must_be='an int'):
    """Ensure a variable is an integer."""
    # This is preferred over numbers.Integral, see:
    # https://github.com/scipy/scipy/pull/7351#issuecomment-299713159
    try:
        x = int(operator.index(x))
    except TypeError:
        raise TypeError('%s must be %s, got %s' % (name, must_be, type(x)))
    return x
예제 #29
0
 def __getitem__(self, key):
     # Just integer indices (no slices) for now
     if not isinstance(key, tuple):
         key = (key,)
     key = tuple([operator.index(i) for i in key])
     blzarr = self.blzarr
     # The returned arrays are temporary buffers,
     # so must be flagged as readonly.
     return DyNDDataDescriptor(nd.array(blzarr[key], access='readonly'))
예제 #30
0
 def __init__(self, mbdd):
     if len(mbdd.dshape) <= 1:
         raise IndexError('Need at least one dimension for iteration')
     self._outer_stride = mbdd.dshape.c_strides[0]
     self._mbdd = mbdd
     self._dshape = mbdd.dshape.subarray(1)
     self._ptr = mbdd.ptr
     self._end = (self._ptr +
                     self._outer_stride * operator.index(mbdd.dshape[0]))
예제 #31
0
def sanitize_smiles_df(
    df: pd.DataFrame,
    column_levels: SupportsIndex = 2,
    column_padding: Hashable = None,
) -> pd.DataFrame:
    """Sanitize the passed dataframe, canonicalizing the SMILES in its index, converting the columns into a multiIndex and removing duplicate entries.

    Examples
    --------
    .. code-block:: python

        >>> import pandas as pd
        >>> from nanoCAT.recipes import sanitize_smiles_df

        >>> df: pd.DataFrame = ...
        >>> print(df)
                 a
        smiles
        CCCO[H]  1
        CCO[H]   2
        CO[H]    3

        >>> sanitize_smiles_df(df)
                 a
               NaN
        smiles
        CCCO     1
        CCO      2
        CO       3

    Parameters
    ----------
    df : :class:`pd.DataFrame <pandas.DataFrame>`
        The dataframe in question.
        The dataframes' index should consist of smiles strings.
    column_levels : :class:`int`
        The number of multiindex column levels that should be in the to-be returned dataframe.
    column_padding : :class:`~collections.abc.Hashable`
        The object used as padding for the multiindex levels (where appropiate).

    Returns
    -------
    :class:`pd.DataFrame <pandas.DataFrame>`
        The newly sanitized dataframe.
        Returns either the initially passed dataframe or a copy thereof.

    """  # noqa: E501
    # Sanitize `arguments`
    column_levels = operator.index(column_levels)
    if column_levels < 1:
        raise ValueError("`column_levels` must be larger than or equal to 1")
    elif isinstance(df.columns, pd.MultiIndex) and len(df.columns.levels) > column_levels:
        raise ValueError("`column_levels` must be larger than or equal to number "
                         "of MultiIndex levels in `df`")
    elif not isinstance(column_padding, Hashable):
        raise TypeError("`column_padding` expected a hashable object")

    # Sanitize the index
    index = pd.Index(
        [_canonicalize_smiles(i) for i in df.index],
        dtype=df.index.dtype, name=df.index.name,
    )

    # Create or pad a MultiIndex
    padding = (column_levels - 1) * (column_padding,)
    if not isinstance(df.columns, pd.MultiIndex):
        columns = pd.MultiIndex.from_tuples(
            [(i, *padding) for i in df.columns], names=(df.columns.name, *padding)
        )
    elif len(df.columns.levels) < column_levels:
        columns = pd.MultiIndex.from_tuples(
            [(*j, *padding) for j in df.columns], names=(*df.columns.names, *padding)
        )
    else:
        columns = df.columns.copy()

    mask = ~df.index.duplicated(keep='first') & (df.index != None)
    ret = df[mask]
    ret.index = index[mask]
    ret.columns = columns
    return ret
예제 #32
0
def _reg_pinv(x, reg=0, rank='full', rcond=1e-15):
    """Compute a regularized pseudoinverse of Hermitian matrices.

    Regularization is performed by adding a constant value to each diagonal
    element of the matrix before inversion. This is known as "diagonal
    loading". The loading factor is computed as ``reg * np.trace(x) / len(x)``.

    The pseudo-inverse is computed through SVD decomposition and inverting the
    singular values. When the matrix is rank deficient, some singular values
    will be close to zero and will not be used during the inversion. The number
    of singular values to use can either be manually specified or automatically
    estimated.

    Parameters
    ----------
    x : ndarray, shape (..., n, n)
        Square, Hermitian matrices to invert.
    reg : float
        Regularization parameter. Defaults to 0.
    rank : int | None | 'full'
        This controls the effective rank of the covariance matrix when
        computing the inverse. The rank can be set explicitly by specifying an
        integer value. If ``None``, the rank will be automatically estimated.
        Since applying regularization will always make the covariance matrix
        full rank, the rank is estimated before regularization in this case. If
        'full', the rank will be estimated after regularization and hence
        will mean using the full rank, unless ``reg=0`` is used.
        Defaults to 'full'.
    rcond : float | 'auto'
        Cutoff for detecting small singular values when attempting to estimate
        the rank of the matrix (``rank='auto'``). Singular values smaller than
        the cutoff are set to zero. When set to 'auto', a cutoff based on
        floating point precision will be used. Defaults to 1e-15.

    Returns
    -------
    x_inv : ndarray, shape (..., n, n)
        The inverted matrix.
    loading_factor : float
        Value added to the diagonal of the matrix during regularization.
    rank : int
        If ``rank`` was set to an integer value, this value is returned,
        else the estimated rank of the matrix, before regularization, is
        returned.
    """
    from ..rank import _estimate_rank_from_s
    if rank is not None and rank != 'full':
        rank = int(operator.index(rank))
    if x.ndim < 2 or x.shape[-2] != x.shape[-1]:
        raise ValueError('Input matrix must be square.')
    if not np.allclose(x, x.conj().swapaxes(-2, -1)):
        raise ValueError('Input matrix must be Hermitian (symmetric)')
    assert x.ndim >= 2 and x.shape[-2] == x.shape[-1]
    n = x.shape[-1]

    # Decompose the matrix, not necessarily positive semidefinite
    from mne.fixes import svd
    U, s, Vh = svd(x, hermitian=True)

    # Estimate the rank before regularization
    tol = 'auto' if rcond == 'auto' else rcond * s[..., :1]
    rank_before = _estimate_rank_from_s(s, tol)

    # Decompose the matrix again after regularization
    loading_factor = reg * np.mean(s, axis=-1)
    if reg:
        U, s, Vh = svd(x +
                       loading_factor[..., np.newaxis, np.newaxis] * np.eye(n),
                       hermitian=True)

    # Estimate the rank after regularization
    tol = 'auto' if rcond == 'auto' else rcond * s[..., :1]
    rank_after = _estimate_rank_from_s(s, tol)

    # Warn the user if both all parameters were kept at their defaults and the
    # matrix is rank deficient.
    if (rank_after < n).any() and reg == 0 and \
            rank == 'full' and rcond == 1e-15:
        warn('Covariance matrix is rank-deficient and no regularization is '
             'done.')
    elif isinstance(rank, int) and rank > n:
        raise ValueError('Invalid value for the rank parameter (%d) given '
                         'the shape of the input matrix (%d x %d).' %
                         (rank, x.shape[0], x.shape[1]))

    # Pick the requested number of singular values
    mask = np.arange(s.shape[-1]).reshape((1, ) * (x.ndim - 2) + (-1, ))
    if rank is None:
        cmp = ret = rank_before
    elif rank == 'full':
        cmp = rank_after
        ret = rank_before
    else:
        cmp = ret = rank
    mask = mask < np.asarray(cmp)[..., np.newaxis]
    mask &= s > 0

    # Invert only non-zero singular values
    s_inv = np.zeros(s.shape)
    s_inv[mask] = 1. / s[mask]

    # Compute the pseudo inverse
    x_inv = np.matmul(U * s_inv[..., np.newaxis, :], Vh)

    return x_inv, loading_factor, ret
예제 #33
0
def make_lsq_spline(x, y, t, k=3, w=None, axis=0, check_finite=True):
    r"""Compute the (coefficients of) an LSQ B-spline.

    The result is a linear combination

    .. math::

            S(x) = \sum_j c_j B_j(x; t)

    of the B-spline basis elements, :math:`B_j(x; t)`, which minimizes

    .. math::

        \sum_{j} \left( w_j \times (S(x_j) - y_j) \right)^2

    Parameters
    ----------
    x : array_like, shape (m,)
        Abscissas.
    y : array_like, shape (m, ...)
        Ordinates.
    t : array_like, shape (n + k + 1,).
        Knots.
        Knots and data points must satisfy Schoenberg-Whitney conditions.
    k : int, optional
        B-spline degree. Default is cubic, k=3.
    w : array_like, shape (n,), optional
        Weights for spline fitting. Must be positive. If ``None``,
        then weights are all equal.
        Default is ``None``.
    axis : int, optional
        Interpolation axis. Default is zero.
    check_finite : bool, optional
        Whether to check that the input arrays contain only finite numbers.
        Disabling may give a performance gain, but may result in problems
        (crashes, non-termination) if the inputs do contain infinities or NaNs.
        Default is True.

    Returns
    -------
    b : a BSpline object of the degree `k` with knots `t`.

    Notes
    -----

    The number of data points must be larger than the spline degree `k`.

    Knots `t` must satisfy the Schoenberg-Whitney conditions,
    i.e., there must be a subset of data points ``x[j]`` such that
    ``t[j] < x[j] < t[j+k+1]``, for ``j=0, 1,...,n-k-2``.

    Examples
    --------
    Generate some noisy data:

    >>> x = np.linspace(-3, 3, 50)
    >>> y = np.exp(-x**2) + 0.1 * np.random.randn(50)

    Now fit a smoothing cubic spline with a pre-defined internal knots.
    Here we make the knot vector (k+1)-regular by adding boundary knots:

    >>> from scipy.interpolate import make_lsq_spline, BSpline
    >>> t = [-1, 0, 1]
    >>> k = 3
    >>> t = np.r_[(x[0],)*(k+1),
    ...           t,
    ...           (x[-1],)*(k+1)]
    >>> spl = make_lsq_spline(x, y, t, k)

    For comparison, we also construct an interpolating spline for the same
    set of data:

    >>> from scipy.interpolate import make_interp_spline
    >>> spl_i = make_interp_spline(x, y)

    Plot both:

    >>> import matplotlib.pyplot as plt
    >>> xs = np.linspace(-3, 3, 100)
    >>> plt.plot(x, y, 'ro', ms=5)
    >>> plt.plot(xs, spl(xs), 'g-', lw=3, label='LSQ spline')
    >>> plt.plot(xs, spl_i(xs), 'b-', lw=3, alpha=0.7, label='interp spline')
    >>> plt.legend(loc='best')
    >>> plt.show()

    **NaN handling**: If the input arrays contain ``nan`` values, the result is
    not useful since the underlying spline fitting routines cannot deal with
    ``nan``. A workaround is to use zero weights for not-a-number data points:

    >>> y[8] = np.nan
    >>> w = np.isnan(y)
    >>> y[w] = 0.
    >>> tck = make_lsq_spline(x, y, t, w=~w)

    Notice the need to replace a ``nan`` by a numerical value (precise value
    does not matter as long as the corresponding weight is zero.)

    See Also
    --------
    BSpline : base class representing the B-spline objects
    make_interp_spline : a similar factory function for interpolating splines
    LSQUnivariateSpline : a FITPACK-based spline fitting routine
    splrep : a FITPACK-based fitting routine

    """
    x = _as_float_array(x, check_finite)
    y = _as_float_array(y, check_finite)
    t = _as_float_array(t, check_finite)
    if w is not None:
        w = _as_float_array(w, check_finite)
    else:
        w = np.ones_like(x)
    k = operator.index(k)

    axis = normalize_axis_index(axis, y.ndim)

    y = np.rollaxis(y, axis)    # now internally interp axis is zero

    if x.ndim != 1 or np.any(x[1:] - x[:-1] <= 0):
        raise ValueError("Expect x to be a 1-D sorted array_like.")
    if x.shape[0] < k+1:
        raise ValueError("Need more x points.")
    if k < 0:
        raise ValueError("Expect non-negative k.")
    if t.ndim != 1 or np.any(t[1:] - t[:-1] < 0):
        raise ValueError("Expect t to be a 1-D sorted array_like.")
    if x.size != y.shape[0]:
        raise ValueError('x & y are incompatible.')
    if k > 0 and np.any((x < t[k]) | (x > t[-k])):
        raise ValueError('Out of bounds w/ x = %s.' % x)
    if x.size != w.size:
        raise ValueError('Incompatible weights.')

    # number of coefficients
    n = t.size - k - 1

    # construct A.T @ A and rhs with A the collocation matrix, and
    # rhs = A.T @ y for solving the LSQ problem  ``A.T @ A @ c = A.T @ y``
    lower = True
    extradim = prod(y.shape[1:])
    ab = np.zeros((k+1, n), dtype=np.float_, order='F')
    rhs = np.zeros((n, extradim), dtype=y.dtype, order='F')
    _bspl._norm_eq_lsq(x, t, k,
                      y.reshape(-1, extradim),
                      w,
                      ab, rhs)
    rhs = rhs.reshape((n,) + y.shape[1:])

    # have observation matrix & rhs, can solve the LSQ problem
    cho_decomp = cholesky_banded(ab, overwrite_ab=True, lower=lower,
                                 check_finite=check_finite)
    c = cho_solve_banded((cho_decomp, lower), rhs, overwrite_b=True,
                         check_finite=check_finite)

    c = np.ascontiguousarray(c)
    return BSpline.construct_fast(t, c, k, axis=axis)
예제 #34
0
 def nchan(self, nchan):
     self['OBSNCHAN'] = operator.index(nchan)
예제 #35
0
 def _check_max_bytes(self, max_bytes):
     if max_bytes is None:
         return
     max_bytes = operator.index(max_bytes)
     if max_bytes < 1:
         raise ValueError("max_bytes must be >= 1")
예제 #36
0
 def __setitem__(self, idx, module):
     idx = operator.index(idx)
     return setattr(self, str(idx), module)
예제 #37
0
 def __setitem__(self, idx, param):
     idx = operator.index(idx)
     return self.register_parameter(str(idx), param)
예제 #38
0
def eye(N, M=None, k=0, dtype=float, order='C', *, like=None):
    """
    Return a 2-D array with ones on the diagonal and zeros elsewhere.

    Parameters
    ----------
    N : int
      Number of rows in the output.
    M : int, optional
      Number of columns in the output. If None, defaults to `N`.
    k : int, optional
      Index of the diagonal: 0 (the default) refers to the main diagonal,
      a positive value refers to an upper diagonal, and a negative value
      to a lower diagonal.
    dtype : data-type, optional
      Data-type of the returned array.
    order : {'C', 'F'}, optional
        Whether the output should be stored in row-major (C-style) or
        column-major (Fortran-style) order in memory.

        .. versionadded:: 1.14.0
    ${ARRAY_FUNCTION_LIKE}

        .. versionadded:: 1.20.0

    Returns
    -------
    I : ndarray of shape (N,M)
      An array where all elements are equal to zero, except for the `k`-th
      diagonal, whose values are equal to one.

    See Also
    --------
    identity : (almost) equivalent function
    diag : diagonal 2-D array from a 1-D array specified by the user.

    Examples
    --------
    >>> np.eye(2, dtype=int)
    array([[1, 0],
           [0, 1]])
    >>> np.eye(3, k=1)
    array([[0.,  1.,  0.],
           [0.,  0.,  1.],
           [0.,  0.,  0.]])

    """
    if like is not None:
        return _eye_with_like(N, M=M, k=k, dtype=dtype, order=order, like=like)
    if M is None:
        M = N
    m = zeros((N, M), dtype=dtype, order=order)
    if k >= M:
        return m
    # Ensure M and k are integers, so we don't get any surprise casting
    # results in the expressions `M-k` and `M+1` used below.  This avoids
    # a problem with inputs with type (for example) np.uint64.
    M = operator.index(M)
    k = operator.index(k)
    if k >= 0:
        i = k
    else:
        i = (-k) * M
    m[:M - k].flat[i::M + 1] = 1
    return m
예제 #39
0
def as_int(n, strict=True):
    """
    Convert the argument to a builtin integer.

    The return value is guaranteed to be equal to the input. ValueError is
    raised if the input has a non-integral value. When ``strict`` is True, this
    uses `__index__ <https://docs.python.org/3/reference/datamodel.html#object.__index__>`_
    and when it is False it uses ``int``.


    Examples
    ========

    >>> from sympy.core.compatibility import as_int
    >>> from sympy import sqrt, S

    The function is primarily concerned with sanitizing input for
    functions that need to work with builtin integers, so anything that
    is unambiguously an integer should be returned as an int:

    >>> as_int(S(3))
    3

    Floats, being of limited precision, are not assumed to be exact and
    will raise an error unless the ``strict`` flag is False. This
    precision issue becomes apparent for large floating point numbers:

    >>> big = 1e23
    >>> type(big) is float
    True
    >>> big == int(big)
    True
    >>> as_int(big)
    Traceback (most recent call last):
    ...
    ValueError: ... is not an integer
    >>> as_int(big, strict=False)
    99999999999999991611392

    Input that might be a complex representation of an integer value is
    also rejected by default:

    >>> one = sqrt(3 + 2*sqrt(2)) - sqrt(2)
    >>> int(one) == 1
    True
    >>> as_int(one)
    Traceback (most recent call last):
    ...
    ValueError: ... is not an integer
    """
    if strict:
        try:
            if type(n) is bool:
                raise TypeError
            return operator.index(n)
        except TypeError:
            raise ValueError('%s is not an integer' % (n,))
    else:
        try:
            result = int(n)
        except TypeError:
            raise ValueError('%s is not an integer' % (n,))
        if n != result:
            raise ValueError('%s is not an integer' % (n,))
        return result
예제 #40
0
 def __getitem__(self, key):
     if isinstance(key, slice):
         cls = type(self)
         return cls(self._components[key])
     index = operator.index(key)
     return self._components[index]
예제 #41
0
def random_bcoo(key,
                shape,
                *,
                dtype=jnp.float_,
                indices_dtype=jnp.int_,
                nse=0.2,
                n_batch=0,
                n_dense=0,
                unique_indices=True,
                sorted_indices=False,
                generator=random.uniform,
                **kwds):
    """Generate a random BCOO matrix.

  Args:
    key : random.PRNGKey to be passed to ``generator`` function.
    shape : tuple specifying the shape of the array to be generated.
    dtype : dtype of the array to be generated.
    indices_dtype: dtype of the BCOO indicies.
    nse : number of specified elements in the matrix, or if 0 < nse < 1, a
      fraction of sparse dimensions to be specified (default: 0.2).
    n_batch : number of batch dimensions. must satisfy ``n_batch >= 0`` and
      ``n_batch + n_dense <= len(shape)``.
    n_dense : number of batch dimensions. must satisfy ``n_dense >= 0`` and
      ``n_batch + n_dense <= len(shape)``.
    unique_indices : boolean specifying whether indices should be unique
      (default: True).
    sorted_indices : boolean specifying whether indices should be row-sorted in
      lexicographical order (default: False).
    generator : function for generating random values accepting a key, shape,
      and dtype. It defaults to :func:`jax.random.uniform`, and may be any
      function with a similar signature.
    **kwds : additional keyword arguments to pass to ``generator``.

  Returns:
    arr : a sparse.BCOO array with the specified properties.
  """
    shape = tuple(map(operator.index, shape))
    n_batch = operator.index(n_batch)
    n_dense = operator.index(n_dense)
    if n_batch < 0 or n_dense < 0 or n_batch + n_dense > len(shape):
        raise ValueError(
            f"Invalid n_batch={n_batch}, n_dense={n_dense} for shape={shape}")
    n_sparse = len(shape) - n_batch - n_dense
    batch_shape, sparse_shape, dense_shape = map(
        tuple, split_list(shape, [n_batch, n_sparse]))
    batch_size = np.prod(batch_shape)
    sparse_size = np.prod(sparse_shape)
    if not 0 <= nse < sparse_size:
        raise ValueError(
            f"got nse={nse}, expected to be between 0 and {sparse_size}")
    if 0 < nse < 1:
        nse = int(np.ceil(nse * sparse_size))
    nse = operator.index(nse)

    data_shape = batch_shape + (nse, ) + dense_shape
    indices_shape = batch_shape + (nse, n_sparse)

    @vmap
    def _indices(key):
        if not sparse_shape:
            return jnp.empty((nse, n_sparse), dtype=int)
        flat_ind = random.choice(key,
                                 sparse_size,
                                 shape=(nse, ),
                                 replace=not unique_indices)
        return jnp.column_stack(jnp.unravel_index(flat_ind, sparse_shape))

    keys = random.split(key, batch_size + 1)
    data_key, index_keys = keys[0], keys[1:]
    data = generator(data_key, shape=data_shape, dtype=dtype, **kwds)
    indices = _indices(index_keys).reshape(indices_shape).astype(indices_dtype)
    mat = sparse.BCOO((data, indices), shape=shape)
    return mat.sort_indices() if sorted_indices else mat
예제 #42
0
def _get_bin_edges(a, bins, range, weights):
    """
    Compute the bins used internally by `histogram`.

    Parameters
    ==========
    a : ndarray
        Ravelled data array
    bins, range
        Forwarded arguments from `histogram`.
    weights : ndarray, optional
        Ravelled weights array, or None
    Returns
    =======
    bin_edges : ndarray
        Array of bin edges
    uniform_bins : (Number, Number, int):
        The upper bound, lowerbound, and number of bins, used in the optimized
        implementation of `histogram` that works on uniform bins.
    """
    # parse the overloaded bins argument
    n_equal_bins = None
    bin_edges = None

    if isinstance(bins, str):
        bin_name = bins
        # if `bins` is a string for an automatic method,
        # this will replace it with the number of bins calculated
        if bin_name not in _hist_bin_selectors:
            raise ValueError(
                "{!r} is not a valid estimator for `bins`".format(bin_name))
        if weights is not None:
            raise TypeError("Automated estimation of the number of "
                            "bins is not supported for weighted data")

        first_edge, last_edge = _get_outer_edges(a, range)

        # truncate the range if needed
        if range is not None:
            keep = a >= first_edge
            keep &= a <= last_edge
            if not np.logical_and.reduce(keep):
                a = a[keep]

        if a.size == 0:
            n_equal_bins = 1
        else:
            # Do not call selectors on empty arrays
            width = _hist_bin_selectors[bin_name](a, (first_edge, last_edge))
            if width:
                n_equal_bins = int(
                    np.ceil(_unsigned_subtract(last_edge, first_edge) / width))
            else:
                # Width can be zero for some estimators, e.g. FD when
                # the IQR of the data is zero.
                n_equal_bins = 1

    elif np.ndim(bins) == 0:
        try:
            n_equal_bins = operator.index(bins)
        except TypeError as e:
            raise TypeError(
                "`bins` must be an integer, a string, or an array") from e
        if n_equal_bins < 1:
            raise ValueError("`bins` must be positive, when an integer")

    else:
        raise ValueError("`bins` must be 1d, when an array")

    return bin_edges, n_equal_bins
예제 #43
0
def tabulate_neighbors(L, kind):
    r"""Tabulate the root-2 neighbors on the 3D cubic lattice with PBC.

    Parameters
    ----------
    L : int or length-3 array
        the lattice is L \times L \times L if L is a scalar,
        otherwise L[0]*L[1]*L[2]
    kind : callable or str
        ``kind(site, L)`` returns a list of neighbors of ``site``.

    Returns
    -------
    neighbors : array, shape(Nsite, NUM_NEIGHB + 1)

    Notes
    -----
    
    The sites of the lattice are indexed by a flat index,
    ``site = 0, 1, ..., Nsite-1``, where ``Nsite = L**3``.
    
    The ``neighbors`` array is: ``nk = neighbors[site, 0]`` is the number of neighbors
    of ``site`` and ``neighbors[site, 1:nk+1]`` are the neighbor sites.

    E.g., for the following 2D arrangement, the format is

          4
          |
      2 - 1 - 3
          |
          5

    ``neighbors[1, 0] == 4`` since there are four neighbors, and 
    ``neighbors[1, 1:5] == [2, 3, 4, 5]`` (the order not guaranteed).
    """
    try:
        L = operator.index(L)
        L = (L, ) * dimension(kind)
    except:
        # TODO: allow 2D w/ kind='sc' and L=(3, 4)
        assert len(L) == dimension(kind)

    if callable(kind):
        get_neighbors = kind
    else:
        get_neighbors = get_neighbors_selector(kind)

    # total # of sites
    Nsite = 1
    for ll in L:
        Nsite *= ll

    # construct lists of neighbors for each site
    n_lst = []
    for site in range(Nsite):
        lst = get_neighbors(site, L)
        n_lst.append([len(lst)] + lst)

    # max coordination number
    max_num_neighb = max([e[0] for e in n_lst])

    # copy neighbors lists into a numpy array
    neighb = np.empty((Nsite, max_num_neighb + 1), dtype=int)
    for site in range(Nsite):
        neighb[site, :max_num_neighb + 1] = n_lst[site]

    return neighb
예제 #44
0
def _get_bin_edges(a, bins, range, weights):
    """
    Computes the bins used internally by `histogram`.

    Parameters
    ==========
    a : ndarray
        Ravelled data array
    bins, range
        Forwarded arguments from `histogram`.
    weights : ndarray, optional
        Ravelled weights array, or None

    Returns
    =======
    bin_edges : ndarray
        Array of bin edges
    uniform_bins : (Number, Number, int):
        The upper bound, lowerbound, and number of bins, used in the optimized
        implementation of `histogram` that works on uniform bins.
    """
    # parse the overloaded bins argument
    n_equal_bins = None
    bin_edges = None

    if isinstance(bins, str):
        bin_name = bins
        # if `bins` is a string for an automatic method,
        # this will replace it with the number of bins calculated
        if bin_name not in _hist_bin_selectors:
            raise ValueError(
                "{!r} is not a valid estimator for `bins`".format(bin_name))
        if weights is not None:
            raise TypeError("Automated estimation of the number of "
                            "bins is not supported for weighted data")

        first_edge, last_edge = _get_outer_edges(a, range)

        # truncate the range if needed
        if range is not None:
            keep = (a >= first_edge)
            keep &= (a <= last_edge)
            if not np.logical_and.reduce(keep):
                a = a[keep]

        if a.size == 0:
            n_equal_bins = 1
        else:
            # Do not call selectors on empty arrays
            width = _hist_bin_selectors[bin_name](a, (first_edge, last_edge))
            if width:
                n_equal_bins = int(
                    np.ceil(_unsigned_subtract(last_edge, first_edge) / width))
            else:
                # Width can be zero for some estimators, e.g. FD when
                # the IQR of the data is zero.
                n_equal_bins = 1

    elif np.ndim(bins) == 0:
        try:
            n_equal_bins = operator.index(bins)
        except TypeError:
            raise TypeError('`bins` must be an integer, a string, or an array')
        if n_equal_bins < 1:
            raise ValueError('`bins` must be positive, when an integer')

        first_edge, last_edge = _get_outer_edges(a, range)

    elif np.ndim(bins) == 1:
        bin_edges = np.asarray(bins)
        if np.any(bin_edges[:-1] > bin_edges[1:]):
            raise ValueError(
                '`bins` must increase monotonically, when an array')

    else:
        raise ValueError('`bins` must be 1d, when an array')

    if n_equal_bins is not None:
        # gh-10322 means that type resolution rules are dependent on array
        # shapes. To avoid this causing problems, we pick a type now and stick
        # with it throughout.
        bin_type = np.result_type(first_edge, last_edge, a)
        if np.issubdtype(bin_type, np.integer):
            bin_type = np.result_type(bin_type, float)

        # bin edges must be computed
        bin_edges = np.linspace(first_edge,
                                last_edge,
                                n_equal_bins + 1,
                                endpoint=True,
                                dtype=bin_type)
        return bin_edges, (first_edge, last_edge, n_equal_bins)
    else:
        return bin_edges, None
예제 #45
0
 def overlap(self, overlap):
     self['OVERLAP'] = operator.index(overlap)
예제 #46
0
def as_integer_or_none(value):
    return None if value is None else operator.index(value)
예제 #47
0
def _canonicalize_dimension(dim):
    if type(dim) in _DIMENSION_TYPES:
        return dim
    else:
        return operator.index(dim)
예제 #48
0
def linspace(start,
             stop,
             num=50,
             endpoint=True,
             retstep=False,
             dtype=None,
             axis=0):
    """
    Return evenly spaced numbers over a specified interval.

    Returns `num` evenly spaced samples, calculated over the
    interval [`start`, `stop`].

    The endpoint of the interval can optionally be excluded.

    .. versionchanged:: 1.16.0
        Non-scalar `start` and `stop` are now supported.

    Parameters
    ----------
    start : array_like
        The starting value of the sequence.
    stop : array_like
        The end value of the sequence, unless `endpoint` is set to False.
        In that case, the sequence consists of all but the last of ``num + 1``
        evenly spaced samples, so that `stop` is excluded.  Note that the step
        size changes when `endpoint` is False.
    num : int, optional
        Number of samples to generate. Default is 50. Must be non-negative.
    endpoint : bool, optional
        If True, `stop` is the last sample. Otherwise, it is not included.
        Default is True.
    retstep : bool, optional
        If True, return (`samples`, `step`), where `step` is the spacing
        between samples.
    dtype : dtype, optional
        The type of the output array.  If `dtype` is not given, infer the data
        type from the other input arguments.

        .. versionadded:: 1.9.0

    axis : int, optional
        The axis in the result to store the samples.  Relevant only if start
        or stop are array-like.  By default (0), the samples will be along a
        new axis inserted at the beginning. Use -1 to get an axis at the end.

        .. versionadded:: 1.16.0

    Returns
    -------
    samples : ndarray
        There are `num` equally spaced samples in the closed interval
        ``[start, stop]`` or the half-open interval ``[start, stop)``
        (depending on whether `endpoint` is True or False).
    step : float, optional
        Only returned if `retstep` is True

        Size of spacing between samples.


    See Also
    --------
    arange : Similar to `linspace`, but uses a step size (instead of the
             number of samples).
    geomspace : Similar to `linspace`, but with numbers spaced evenly on a log
                scale (a geometric progression).
    logspace : Similar to `geomspace`, but with the end points specified as
               logarithms.

    Examples
    --------
    >>> np.linspace(2.0, 3.0, num=5)
    array([2.  , 2.25, 2.5 , 2.75, 3.  ])
    >>> np.linspace(2.0, 3.0, num=5, endpoint=False)
    array([2. ,  2.2,  2.4,  2.6,  2.8])
    >>> np.linspace(2.0, 3.0, num=5, retstep=True)
    (array([2.  ,  2.25,  2.5 ,  2.75,  3.  ]), 0.25)

    Graphical illustration:

    >>> import matplotlib.pyplot as plt
    >>> N = 8
    >>> y = np.zeros(N)
    >>> x1 = np.linspace(0, 10, N, endpoint=True)
    >>> x2 = np.linspace(0, 10, N, endpoint=False)
    >>> plt.plot(x1, y, 'o')
    [<matplotlib.lines.Line2D object at 0x...>]
    >>> plt.plot(x2, y + 0.5, 'o')
    [<matplotlib.lines.Line2D object at 0x...>]
    >>> plt.ylim([-0.5, 1])
    (-0.5, 1)
    >>> plt.show()

    """
    try:
        num = operator.index(num)
    except TypeError:
        raise TypeError(
            "object of type {} cannot be safely interpreted as an integer.".
            format(type(num)))

    if num < 0:
        raise ValueError("Number of samples, %s, must be non-negative." % num)
    div = (num - 1) if endpoint else num

    # Convert float/complex array scalars to float, gh-3504
    # and make sure one can use variables that have an __array_interface__, gh-6634
    start = asanyarray(start) * 1.0
    stop = asanyarray(stop) * 1.0

    dt = result_type(start, stop, float(num))
    if dtype is None:
        dtype = dt

    delta = stop - start
    y = _nx.arange(0, num, dtype=dt).reshape((-1, ) + (1, ) * ndim(delta))
    # In-place multiplication y *= delta/div is faster, but prevents the multiplicant
    # from overriding what class is produced, and thus prevents, e.g. use of Quantities,
    # see gh-7142. Hence, we multiply in place only for standard scalar types.
    _mult_inplace = _nx.isscalar(delta)
    if div > 0:
        step = delta / div
        if _nx.any(step == 0):
            # Special handling for denormal numbers, gh-5437
            y /= div
            if _mult_inplace:
                y *= delta
            else:
                y = y * delta
        else:
            if _mult_inplace:
                y *= step
            else:
                y = y * step
    else:
        # sequences with 0 items or 1 item with endpoint=True (i.e. div <= 0)
        # have an undefined step
        step = NaN
        # Multiply with delta to allow possible override of output class.
        y = y * delta

    y += start

    if endpoint and num > 1:
        y[-1] = stop

    if axis != 0:
        y = _nx.moveaxis(y, 0, axis)

    if retstep:
        return y.astype(dtype, copy=False), step
    else:
        return y.astype(dtype, copy=False)
예제 #49
0
def _quadratic_assignment_faq(
    A: np.ndarray,
    B: np.ndarray,
    maximize: bool = False,
    partial_match: Optional[np.ndarray] = None,
    S: Optional[np.ndarray] = None,
    rng: Optional[Union[int, np.random.RandomState,
                        np.random.Generator]] = None,
    P0: Union[Literal["barycenter", "randomized"], np.ndarray] = "barycenter",
    shuffle_input: bool = False,
    maxiter: int = 30,
    tol: float = 0.03,
) -> OptimizeResult:
    r"""
    Solve the quadratic assignment problem (approximately).
    This function solves the Quadratic Assignment Problem (QAP) and the
    Graph Matching Problem (GMP) using the Fast Approximate QAP Algorithm
    (FAQ) [1]_.
    Quadratic assignment solves problems of the following form:
    .. math::
        \min_P & \ {\ \text{trace}(A^T P B P^T)}\\
        \mbox{s.t. } & {P \ \epsilon \ \mathcal{P}}\\
    where :math:`\mathcal{P}` is the set of all permutation matrices,
    and :math:`A` and :math:`B` are square matrices.
    Graph matching tries to *maximize* the same objective function.
    This algorithm can be thought of as finding the alignment of the
    nodes of two graphs that minimizes the number of induced edge
    disagreements, or, in the case of weighted graphs, the sum of squared
    edge weight differences.
    Note that the quadratic assignment problem is NP-hard, is not
    known to be solvable in polynomial time, and is computationally
    intractable. Therefore, the results given are approximations,
    not guaranteed to be exact solutions.
    Parameters
    ----------
    A : 2d-array, square
        The square matrix :math:`A` in the objective function above.
    B : 2d-array, square
        The square matrix :math:`B` in the objective function above.
    method :  str in {'faq', '2opt'} (default: 'faq')
        The algorithm used to solve the problem. This is the method-specific
        documentation for 'faq'.
        :ref:`'2opt' <optimize.qap-2opt>` is also available.
    Options
    -------
    maximize : bool (default = False)
        Setting `maximize` to ``True`` solves the Graph Matching Problem (GMP)
        rather than the Quadratic Assingnment Problem (QAP). This is
        accomplished through trivial negation of the objective function.
    rng : {None, int, `~np.random.RandomState`, `~np.random.Generator`}
        This parameter defines the object to use for drawing random
        variates.
        If `rng` is ``None`` the `~np.random.RandomState` singleton is
        used.
        If `rng` is an int, a new ``RandomState`` instance is used,
        seeded with `rng`.
        If `rng` is already a ``RandomState`` or ``Generator``
        instance, then that object is used.
        Default is None.
    partial_match : 2d-array of integers, optional, (default = None)
        Allows the user to fix part of the matching between the two
        matrices. In the literature, a partial match is also known as a
        "seed".
        Each row of `partial_match` specifies the indices of a pair of
        corresponding nodes, that is, node ``partial_match[i, 0]`` of `A` is
        matched to node ``partial_match[i, 1]`` of `B`. Accordingly,
        ``partial_match`` is an array of size ``(m , 2)``, where ``m`` is
        not greater than the number of nodes, :math:`n`.
    S : 2d-array, square
        A similarity matrix. Should be same shape as ``A`` and ``B``.   
        Note: the scale of `S` may effect the weight placed on the term 
        :math:`\\text{trace}(S^T P)` relative to :math:`\\text{trace}(A^T PBP^T)` 
        during the optimization process.
    P0 : 2d-array, "barycenter", or "randomized" (default = "barycenter")
        The initial (guess) permutation matrix or search "position"
        `P0`.
        `P0` need not be a proper permutation matrix;
        however, it must be :math:`m' x m'`, where :math:`m' = n - m`,
        and it must be doubly stochastic: each of its rows and columns must
        sum to 1.
        If unspecified or ``"barycenter"``, the non-informative "flat
        doubly stochastic matrix" :math:`J = 1*1^T/m'`, where :math:`1` is
        a :math:`m' \times 1` array of ones, is used. This is the "barycenter"
        of the search space of doubly-stochastic matrices.
        If ``"randomized"``, the algorithm will start from the
        randomized initial search position :math:`P_0 = (J + K)/2`,
        where :math:`J` is the "barycenter" and :math:`K` is a random
        doubly stochastic matrix.
    shuffle_input : bool (default = False)
        To avoid artificially high or low matching due to inherent
        sorting of input matrices, gives users the option
        to shuffle the nodes. Results are then unshuffled so that the
        returned results correspond with the node order of inputs.
        Shuffling may cause the algorithm to be non-deterministic,
        unless a random seed is set or an `rng` option is provided.
    maxiter : int, positive (default = 30)
        Integer specifying the max number of Franke-Wolfe iterations performed.
    tol : float (default = 0.03)
        A threshold for the stopping criterion. Franke-Wolfe
        iteration terminates when the change in search position between
        iterations is sufficiently small, that is, when the relative Frobenius
        norm, :math:`\frac{||P_{i}-P_{i+1}||_F}{\sqrt{len(P_{i})}} \leq tol`,
        where :math:`i` is the iteration number.
    Returns
    -------
    res : OptimizeResult
        A :class:`scipy.optimize.OptimizeResult` containing the following
        fields.
        col_ind : 1-D array
            An array of column indices corresponding with the best
            permutation of the nodes of `B` found.
        fun : float
            The corresponding value of the objective function.
        nit : int
            The number of Franke-Wolfe iterations performed.
    Notes
    -----
    The algorithm may be sensitive to the initial permutation matrix (or
    search "position") due to the possibility of several local minima
    within the feasible region. A barycenter initialization is more likely to
    result in a better solution than a single random initialization. However,
    ``quadratic_assignment`` calling several times with different random
    initializations may result in a better optimum at the cost of longer
    total execution time.
    Examples
    --------
    As mentioned above, a barycenter initialization often results in a better
    solution than a single random initialization.
    >>> np.random.seed(0)
    >>> n = 15
    >>> A = np.random.rand(n, n)
    >>> B = np.random.rand(n, n)
    >>> res = quadratic_assignment(A, B)  # FAQ is default method
    >>> print(res.fun)
    46.871483385480545 # may vary
    >>> options = {"P0": "randomized"}  # use randomized initialization
    >>> res = quadratic_assignment(A, B, options=options)
    >>> print(res.fun)
    47.224831071310625 # may vary
    However, consider running from several randomized initializations and
    keeping the best result.
    >>> res = min([quadratic_assignment(A, B, options=options)
    ...            for i in range(30)], key=lambda x: x.fun)
    >>> print(res.fun)
    46.671852533681516 # may vary
    The '2-opt' method can be used to further refine the results.
    >>> options = {"partial_guess": np.array([np.arange(n), res.col_ind]).T}
    >>> res = quadratic_assignment(A, B, method="2opt", options=options)
    >>> print(res.fun)
    46.47160735721583 # may vary
    References
    ----------
    .. [1] J.T. Vogelstein, J.M. Conroy, V. Lyzinski, L.J. Podrazik,
           S.G. Kratzer, E.T. Harley, D.E. Fishkind, R.J. Vogelstein, and
           C.E. Priebe, "Fast approximate quadratic programming for graph
           matching," PLOS one, vol. 10, no. 4, p. e0121002, 2015,
           :doi:`10.1371/journal.pone.0121002`
    .. [2] D. Fishkind, S. Adali, H. Patsolic, L. Meng, D. Singh, V. Lyzinski,
           C. Priebe, "Seeded graph matching", Pattern Recognit. 87 (2019):
           203-215, :doi:`10.1016/j.patcog.2018.09.014`
    """

    maxiter = operator.index(maxiter)

    # ValueError check
    A, B, partial_match_value = _common_input_validation(A, B, partial_match)

    msg = None
    if isinstance(P0, str) and P0 not in {"barycenter", "randomized"}:
        msg = "Invalid 'P0' parameter string"
    elif maxiter <= 0:
        msg = "'maxiter' must be a positive integer"
    elif tol <= 0:
        msg = "'tol' must be a positive float"
    if msg is not None:
        raise ValueError(msg)

    if not isinstance(S, np.ndarray):
        raise ValueError("`S` must be an ndarray")
    elif S.shape != (S.shape[0], S.shape[0]):
        raise ValueError("`S` must be square")
    elif S.shape != A.shape:
        raise ValueError("`S`, `A`, and `B` matrices must be of equal size")
    else:
        s_value = S

    rng = check_random_state(rng)
    n = A.shape[0]  # number of vertices in graphs
    n_seeds = partial_match_value.shape[0]  # number of seeds
    n_unseed = n - n_seeds

    # check outlier cases
    if n == 0 or partial_match_value.shape[0] == n:
        # Cannot assume partial_match is sorted.
        partial_match_value = np.row_stack(
            sorted(partial_match_value, key=lambda x: x[0]))
        score = _calc_score(A, B, s_value, partial_match_value[:, 1])
        res = {"col_ind": partial_match_value[:, 1], "fun": score, "nit": 0}
        return OptimizeResult(res)

    obj_func_scalar = 1
    if maximize:
        obj_func_scalar = -1

    nonseed_B = np.setdiff1d(range(n), partial_match_value[:, 1])
    perm_S = np.copy(nonseed_B)
    if shuffle_input:
        nonseed_B = rng.permutation(nonseed_B)
        # shuffle_input to avoid results from inputs that were already matched

    nonseed_A = np.setdiff1d(range(n), partial_match_value[:, 0])
    perm_A = np.concatenate([partial_match_value[:, 0], nonseed_A])
    perm_B = np.concatenate([partial_match_value[:, 1], nonseed_B])

    s_value = s_value[:, perm_B]

    # definitions according to Seeded Graph Matching [2].
    A11, A12, A21, A22 = _split_matrix(A[perm_A][:, perm_A], n_seeds)
    B11, B12, B21, B22 = _split_matrix(B[perm_B][:, perm_B], n_seeds)
    S22 = s_value[perm_S, n_seeds:]

    # [1] Algorithm 1 Line 1 - choose initialization
    if isinstance(P0, str):
        # initialize J, a doubly stochastic barycenter
        J = np.ones((n_unseed, n_unseed)) / n_unseed
        if P0 == "barycenter":
            P = J
        elif P0 == "randomized":
            # generate a nxn matrix where each entry is a random number [0, 1]
            # would use rand, but Generators don't have it
            # would use random, but old mtrand.RandomStates don't have it
            K = rng.uniform(size=(n_unseed, n_unseed))
            # Sinkhorn balancing
            K = _doubly_stochastic(K)
            P = J * 0.5 + K * 0.5
    elif isinstance(P0, np.ndarray):
        P0 = np.atleast_2d(P0)
        _check_init_input(P0, n_unseed)
        invert_inds = np.argsort(nonseed_B)
        perm_nonseed_B = np.argsort(invert_inds)
        P = P0[:, perm_nonseed_B]
    else:
        msg = "`init` must either be of type str or np.ndarray."
        raise TypeError(msg)

    const_sum = A21 @ B21.T + A12.T @ B12 + S22

    # [1] Algorithm 1 Line 2 - loop while stopping criteria not met
    for n_iter in range(1, maxiter + 1):
        # [1] Algorithm 1 Line 3 - compute the gradient of f(P) = -tr(APB^tP^t)
        grad_fp = const_sum + A22 @ P @ B22.T + A22.T @ P @ B22
        # [1] Algorithm 1 Line 4 - get direction Q by solving Eq. 8
        _, cols = linear_sum_assignment(grad_fp, maximize=maximize)
        Q = np.eye(n_unseed)[cols]

        # [1] Algorithm 1 Line 5 - compute the step size
        # Noting that e.g. trace(Ax) = trace(A)*x, expand and re-collect
        # terms as ax**2 + bx + c. c does not affect location of minimum
        # and can be ignored. Also, note that trace(A@B) = (A.T*B).sum();
        # apply where possible for efficiency.
        R = P - Q
        b21 = ((R.T @ A21) * B21).sum()
        b12 = ((R.T @ A12.T) * B12.T).sum()
        AR22 = A22.T @ R
        BR22 = B22 @ R.T
        b22a = (AR22 * B22.T[cols]).sum()
        b22b = (A22 * BR22[cols]).sum()
        s = (S22 * R).sum()
        a = (AR22.T * BR22).sum()
        b = b21 + b12 + b22a + b22b + s
        # critical point of ax^2 + bx + c is at x = -d/(2*e)
        # if a * obj_func_scalar > 0, it is a minimum
        # if minimum is not in [0, 1], only endpoints need to be considered
        if a * obj_func_scalar > 0 and 0 <= -b / (2 * a) <= 1:
            alpha = -b / (2 * a)
        else:
            alpha = np.argmin([0, (b + a) * obj_func_scalar])

        # [1] Algorithm 1 Line 6 - Update P
        P_i1 = alpha * P + (1 - alpha) * Q
        if np.linalg.norm(P - P_i1) / np.sqrt(n_unseed) < tol:
            P = P_i1
            break
        P = P_i1
    # [1] Algorithm 1 Line 7 - end main loop

    # [1] Algorithm 1 Line 8 - project onto the set of permutation matrices
    _, col = linear_sum_assignment(-P)
    perm = np.concatenate((np.arange(n_seeds), col + n_seeds))

    unshuffled_perm = np.zeros(n, dtype=int)
    unshuffled_perm[perm_A] = perm_B[perm]

    score = _calc_score(A, B, s_value, unshuffled_perm)

    res = {"col_ind": unshuffled_perm, "fun": score, "nit": n_iter}

    return OptimizeResult(res)
예제 #50
0
 def test_uid_index(self):
     self.assertEqual(operator.index(UID(1)), 1)
예제 #51
0
def make_interp_spline(x, y, k=3, t=None, bc_type=None, axis=0,
                       check_finite=True):
    """Compute the (coefficients of) interpolating B-spline.

    Parameters
    ----------
    x : array_like, shape (n,)
        Abscissas.
    y : array_like, shape (n, ...)
        Ordinates.
    k : int, optional
        B-spline degree. Default is cubic, k=3.
    t : array_like, shape (nt + k + 1,), optional.
        Knots.
        The number of knots needs to agree with the number of datapoints and
        the number of derivatives at the edges. Specifically, ``nt - n`` must
        equal ``len(deriv_l) + len(deriv_r)``.
    bc_type : 2-tuple or None
        Boundary conditions.
        Default is None, which means choosing the boundary conditions
        automatically. Otherwise, it must be a length-two tuple where the first
        element sets the boundary conditions at ``x[0]`` and the second
        element sets the boundary conditions at ``x[-1]``. Each of these must
        be an iterable of pairs ``(order, value)`` which gives the values of
        derivatives of specified orders at the given edge of the interpolation
        interval.
        Alternatively, the following string aliases are recognized:

        * ``"clamped"``: The first derivatives at the ends are zero. This is
           equivalent to ``bc_type=([(1, 0.0)], [(1, 0.0)])``.
        * ``"natural"``: The second derivatives at ends are zero. This is
          equivalent to ``bc_type=([(2, 0.0)], [(2, 0.0)])``.
        * ``"not-a-knot"`` (default): The first and second segments are the same
          polynomial. This is equivalent to having ``bc_type=None``.

    axis : int, optional
        Interpolation axis. Default is 0.
    check_finite : bool, optional
        Whether to check that the input arrays contain only finite numbers.
        Disabling may give a performance gain, but may result in problems
        (crashes, non-termination) if the inputs do contain infinities or NaNs.
        Default is True.

    Returns
    -------
    b : a BSpline object of the degree ``k`` and with knots ``t``.

    Examples
    --------

    Use cubic interpolation on Chebyshev nodes:

    >>> def cheb_nodes(N):
    ...     jj = 2.*np.arange(N) + 1
    ...     x = np.cos(np.pi * jj / 2 / N)[::-1]
    ...     return x

    >>> x = cheb_nodes(20)
    >>> y = np.sqrt(1 - x**2)

    >>> from scipy.interpolate import BSpline, make_interp_spline
    >>> b = make_interp_spline(x, y)
    >>> np.allclose(b(x), y)
    True

    Note that the default is a cubic spline with a not-a-knot boundary condition

    >>> b.k
    3

    Here we use a 'natural' spline, with zero 2nd derivatives at edges:

    >>> l, r = [(2, 0.0)], [(2, 0.0)]
    >>> b_n = make_interp_spline(x, y, bc_type=(l, r))  # or, bc_type="natural"
    >>> np.allclose(b_n(x), y)
    True
    >>> x0, x1 = x[0], x[-1]
    >>> np.allclose([b_n(x0, 2), b_n(x1, 2)], [0, 0])
    True

    Interpolation of parametric curves is also supported. As an example, we
    compute a discretization of a snail curve in polar coordinates

    >>> phi = np.linspace(0, 2.*np.pi, 40)
    >>> r = 0.3 + np.cos(phi)
    >>> x, y = r*np.cos(phi), r*np.sin(phi)  # convert to Cartesian coordinates

    Build an interpolating curve, parameterizing it by the angle

    >>> from scipy.interpolate import make_interp_spline
    >>> spl = make_interp_spline(phi, np.c_[x, y])

    Evaluate the interpolant on a finer grid (note that we transpose the result
    to unpack it into a pair of x- and y-arrays)

    >>> phi_new = np.linspace(0, 2.*np.pi, 100)
    >>> x_new, y_new = spl(phi_new).T

    Plot the result

    >>> import matplotlib.pyplot as plt
    >>> plt.plot(x, y, 'o')
    >>> plt.plot(x_new, y_new, '-')
    >>> plt.show()

    See Also
    --------
    BSpline : base class representing the B-spline objects
    CubicSpline : a cubic spline in the polynomial basis
    make_lsq_spline : a similar factory function for spline fitting
    UnivariateSpline : a wrapper over FITPACK spline fitting routines
    splrep : a wrapper over FITPACK spline fitting routines

    """
    # convert string aliases for the boundary conditions
    if bc_type is None or bc_type == 'not-a-knot':
        deriv_l, deriv_r = None, None
    elif isinstance(bc_type, str):
        deriv_l, deriv_r = bc_type, bc_type
    else:
        try:
            deriv_l, deriv_r = bc_type
        except TypeError:
            raise ValueError("Unknown boundary condition: %s" % bc_type)

    y = np.asarray(y)

    axis = normalize_axis_index(axis, y.ndim)

    # special-case k=0 right away
    if k == 0:
        if any(_ is not None for _ in (t, deriv_l, deriv_r)):
            raise ValueError("Too much info for k=0: t and bc_type can only "
                             "be None.")
        x = _as_float_array(x, check_finite)
        t = np.r_[x, x[-1]]
        c = np.asarray(y)
        c = np.rollaxis(c, axis)
        c = np.ascontiguousarray(c, dtype=_get_dtype(c.dtype))
        return BSpline.construct_fast(t, c, k, axis=axis)

    # special-case k=1 (e.g., Lyche and Morken, Eq.(2.16))
    if k == 1 and t is None:
        if not (deriv_l is None and deriv_r is None):
            raise ValueError("Too much info for k=1: bc_type can only be None.")
        x = _as_float_array(x, check_finite)
        t = np.r_[x[0], x, x[-1]]
        c = np.asarray(y)
        c = np.rollaxis(c, axis)
        c = np.ascontiguousarray(c, dtype=_get_dtype(c.dtype))
        return BSpline.construct_fast(t, c, k, axis=axis)

    x = _as_float_array(x, check_finite)
    y = _as_float_array(y, check_finite)
    k = operator.index(k)

    # come up with a sensible knot vector, if needed
    if t is None:
        if deriv_l is None and deriv_r is None:
            if k == 2:
                # OK, it's a bit ad hoc: Greville sites + omit
                # 2nd and 2nd-to-last points, a la not-a-knot
                t = (x[1:] + x[:-1]) / 2.
                t = np.r_[(x[0],)*(k+1),
                           t[1:-1],
                           (x[-1],)*(k+1)]
            else:
                t = _not_a_knot(x, k)
        else:
            t = _augknt(x, k)

    t = _as_float_array(t, check_finite)

    y = np.rollaxis(y, axis)    # now internally interp axis is zero

    if x.ndim != 1 or np.any(x[1:] < x[:-1]):
        raise ValueError("Expect x to be a 1-D sorted array_like.")
    if np.any(x[1:] == x[:-1]):
        raise ValueError("Expect x to not have duplicates")
    if k < 0:
        raise ValueError("Expect non-negative k.")
    if t.ndim != 1 or np.any(t[1:] < t[:-1]):
        raise ValueError("Expect t to be a 1-D sorted array_like.")
    if x.size != y.shape[0]:
        raise ValueError('x and y are incompatible.')
    if t.size < x.size + k + 1:
        raise ValueError('Got %d knots, need at least %d.' %
                         (t.size, x.size + k + 1))
    if (x[0] < t[k]) or (x[-1] > t[-k]):
        raise ValueError('Out of bounds w/ x = %s.' % x)

    # Here : deriv_l, r = [(nu, value), ...]
    deriv_l = _convert_string_aliases(deriv_l, y.shape[1:])
    deriv_l_ords, deriv_l_vals = _process_deriv_spec(deriv_l)
    nleft = deriv_l_ords.shape[0]

    deriv_r = _convert_string_aliases(deriv_r, y.shape[1:])
    deriv_r_ords, deriv_r_vals = _process_deriv_spec(deriv_r)
    nright = deriv_r_ords.shape[0]

    # have `n` conditions for `nt` coefficients; need nt-n derivatives
    n = x.size
    nt = t.size - k - 1

    if nt - n != nleft + nright:
        raise ValueError("The number of derivatives at boundaries does not "
                         "match: expected %s, got %s+%s" % (nt-n, nleft, nright))

    # set up the LHS: the collocation matrix + derivatives at boundaries
    kl = ku = k
    ab = np.zeros((2*kl + ku + 1, nt), dtype=np.float_, order='F')
    _bspl._colloc(x, t, k, ab, offset=nleft)
    if nleft > 0:
        _bspl._handle_lhs_derivatives(t, k, x[0], ab, kl, ku, deriv_l_ords)
    if nright > 0:
        _bspl._handle_lhs_derivatives(t, k, x[-1], ab, kl, ku, deriv_r_ords,
                                offset=nt-nright)

    # set up the RHS: values to interpolate (+ derivative values, if any)
    extradim = prod(y.shape[1:])
    rhs = np.empty((nt, extradim), dtype=y.dtype)
    if nleft > 0:
        rhs[:nleft] = deriv_l_vals.reshape(-1, extradim)
    rhs[nleft:nt - nright] = y.reshape(-1, extradim)
    if nright > 0:
        rhs[nt - nright:] = deriv_r_vals.reshape(-1, extradim)

    # solve Ab @ x = rhs; this is the relevant part of linalg.solve_banded
    if check_finite:
        ab, rhs = map(np.asarray_chkfinite, (ab, rhs))
    gbsv, = get_lapack_funcs(('gbsv',), (ab, rhs))
    lu, piv, c, info = gbsv(kl, ku, ab, rhs,
            overwrite_ab=True, overwrite_b=True)

    if info > 0:
        raise LinAlgError("Collocation matix is singular.")
    elif info < 0:
        raise ValueError('illegal value in %d-th argument of internal gbsv' % -info)

    c = np.ascontiguousarray(c.reshape((nt,) + y.shape[1:]))
    return BSpline.construct_fast(t, c, k, axis=axis)
예제 #52
0
파일: search.py 프로젝트: mrcrabby/sunburnt
    def __getitem__(self, k):
        """Return a single result or slice of results from the query.
        """
        # NOTE: only supports the default result constructor.

        # are we already paginated? if so, we'll apply this getitem to the
        # paginated result - else we'll apply it to the whole.
        offset = 0 if self.paginator.start is None else self.paginator.start

        if isinstance(k, slice):
            # calculate solr pagination options for the requested slice
            step = operator.index(k.step) if k.step is not None else 1
            if step == 0:
                raise ValueError("slice step cannot be zero")
            if step > 0:
                s1 = k.start
                s2 = k.stop
                inc = 0
            else:
                s1 = k.stop
                s2 = k.start
                inc = 1

            if s1 is not None:
                start = operator.index(s1)
                if start < 0:
                    start += self.count()
                    start = max(0, start)
                start += inc
            else:
                start = 0
            if s2 is not None:
                stop = operator.index(s2)
                if stop < 0:
                    stop += self.count()
                    stop = max(0, stop)
                stop += inc
            else:
                stop = self.count()

            rows = stop - start
            if self.paginator.rows is not None:
                rows = min(rows, self.paginator.rows)

            if rows <= 0:
                return []

            start += offset

            return self.paginate(start=start, rows=rows).execute()[::step]

        else:
            # if not a slice, a single result is being requested
            k = operator.index(k)
            if k < 0:
                k += self.count()
                if k < 0:
                    raise IndexError("list index out of range")

            # Otherwise do the query anyway, don't count() to avoid extra Solr call
            k += offset
            response = self.paginate(start=k, rows=1).execute()
            if response.result.numFound < k:
                raise IndexError("list index out of range")
            return response.result.docs[0]
예제 #53
0
파일: api_util.py 프로젝트: yashk2810/jax
def _ensure_index_tuple(x: Any) -> Tuple[int, ...]:
    """Convert x to a tuple of indices."""
    try:
        return (operator.index(x), )
    except TypeError:
        return tuple(map(operator.index, x))
예제 #54
0
 def test_basic(self):
     self.o.ind = -2
     self.n.ind = 2
     self.assertEqual(operator.index(self.o), -2)
     self.assertEqual(operator.index(self.n), 2)
예제 #55
0
파일: api_util.py 프로젝트: yashk2810/jax
def _ensure_index(x: Any) -> Union[int, Tuple[int, ...]]:
    """Ensure x is either an index or a tuple of indices."""
    try:
        return operator.index(x)
    except TypeError:
        return tuple(map(operator.index, x))
예제 #56
0
    def _interpret_indexing(self, keys):
        """Internal routine used by __getitem__ and __setitem__"""

        maxlen = len(self.shape)
        shape = (maxlen,)
        startl = numpy.empty(shape=shape, dtype=SizeType)
        stopl = numpy.empty(shape=shape, dtype=SizeType)
        stepl = numpy.empty(shape=shape, dtype=SizeType)
        stop_None = numpy.zeros(shape=shape, dtype=SizeType)
        if not isinstance(keys, tuple):
            keys = (keys,)
        nkeys = len(keys)
        dim = 0
        # Here is some problem when dealing with [...,...] params
        # but this is a bit weird way to pass parameters anyway
        for key in keys:
            ellipsis = 0  # Sentinel
            if isinstance(key, type(Ellipsis)):
                ellipsis = 1
                for diml in range(dim, len(self.shape) - (nkeys - dim) + 1):
                    startl[dim] = 0
                    stopl[dim] = self.shape[diml]
                    stepl[dim] = 1
                    dim += 1
            elif dim >= maxlen:
                raise IndexError("Too many indices for object '%s'" %
                                 self._v_pathname)
            elif is_idx(key):
                key = operator.index(key)

                # Protection for index out of range
                if key >= self.shape[dim]:
                    raise IndexError("Index out of range")
                if key < 0:
                    # To support negative values (Fixes bug #968149)
                    key += self.shape[dim]
                start, stop, step = self._process_range(
                    key, key + 1, 1, dim=dim)
                stop_None[dim] = 1
            elif isinstance(key, slice):
                start, stop, step = self._process_range(
                    key.start, key.stop, key.step, dim=dim)
            else:
                raise TypeError("Non-valid index or slice: %s" % key)
            if not ellipsis:
                startl[dim] = start
                stopl[dim] = stop
                stepl[dim] = step
                dim += 1

        # Complete the other dimensions, if needed
        if dim < len(self.shape):
            for diml in range(dim, len(self.shape)):
                startl[dim] = 0
                stopl[dim] = self.shape[diml]
                stepl[dim] = 1
                dim += 1

        # Compute the shape for the container properly. Fixes #1288792
        shape = []
        for dim in range(len(self.shape)):
            # The negative division operates differently with python scalars
            # and numpy scalars (which are similar to C conventions). See:
            # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3
            # and
            # http://www.peterbe.com/Integer-division-in-programming-languages
            # for more info on this issue.
            # I've finally decided to rely on the len(xrange) function.
            # F. Alted 2006-09-25
            # Switch to `lrange` to allow long ranges (see #99).
            # use xrange, since it supports large integers as of Python 2.6
            # see github #181
            new_dim = len(range(startl[dim], stopl[dim], stepl[dim]))
            if not (new_dim == 1 and stop_None[dim]):
                shape.append(new_dim)

        return startl, stopl, stepl, shape
예제 #57
0
    def __init__(self, arg1, shape=None, dtype=None, copy=False):
        _data_matrix.__init__(self)

        if isinstance(arg1, tuple):
            if isshape(arg1):
                M, N = arg1
                self._shape = check_shape((M, N))
                idx_dtype = get_index_dtype(maxval=max(M, N))
                self.row = np.array([], dtype=idx_dtype)
                self.col = np.array([], dtype=idx_dtype)
                self.data = np.array([], getdtype(dtype, default=float))
                self.has_canonical_format = True
            else:
                try:
                    obj, (row, col) = arg1
                except (TypeError, ValueError) as e:
                    raise TypeError('invalid input format') from e

                if shape is None:
                    if len(row) == 0 or len(col) == 0:
                        raise ValueError('cannot infer dimensions from zero '
                                         'sized index arrays')
                    M = operator.index(np.max(row)) + 1
                    N = operator.index(np.max(col)) + 1
                    self._shape = check_shape((M, N))
                else:
                    # Use 2 steps to ensure shape has length 2.
                    M, N = shape
                    self._shape = check_shape((M, N))

                idx_dtype = get_index_dtype(maxval=max(self.shape))
                self.row = np.array(row, copy=copy, dtype=idx_dtype)
                self.col = np.array(col, copy=copy, dtype=idx_dtype)
                self.data = np.array(obj, copy=copy)
                self.has_canonical_format = False

        else:
            if isspmatrix(arg1):
                if isspmatrix_coo(arg1) and copy:
                    self.row = arg1.row.copy()
                    self.col = arg1.col.copy()
                    self.data = arg1.data.copy()
                    self._shape = check_shape(arg1.shape)
                else:
                    coo = arg1.tocoo()
                    self.row = coo.row
                    self.col = coo.col
                    self.data = coo.data
                    self._shape = check_shape(coo.shape)
                self.has_canonical_format = False
            else:
                #dense argument
                M = np.atleast_2d(np.asarray(arg1))

                if M.ndim != 2:
                    raise TypeError('expected dimension <= 2 array or matrix')

                self._shape = check_shape(M.shape)
                if shape is not None:
                    if check_shape(shape) != self._shape:
                        raise ValueError('inconsistent shapes: %s != %s' %
                                         (shape, self._shape))

                self.row, self.col = M.nonzero()
                self.data = M[self.row, self.col]
                self.has_canonical_format = True

        if dtype is not None:
            self.data = self.data.astype(dtype, copy=False)

        self._check()
예제 #58
0
 def __index__(self):
     return operator.index(self.__wrapped__)
예제 #59
0
def _parse_einsum_input(operands):
    """
    A reproduction of einsum c side einsum parsing in python.

    Returns
    -------
    input_strings : str
        Parsed input strings
    output_string : str
        Parsed output string
    operands : list of array_like
        The operands to use in the numpy contraction

    Examples
    --------
    The operand list is simplified to reduce printing:

    >>> np.random.seed(123)
    >>> a = np.random.rand(4, 4)
    >>> b = np.random.rand(4, 4, 4)
    >>> _parse_einsum_input(('...a,...a->...', a, b))
    ('za,xza', 'xz', [a, b]) # may vary

    >>> _parse_einsum_input((a, [Ellipsis, 0], b, [Ellipsis, 0]))
    ('za,xza', 'xz', [a, b]) # may vary
    """

    if len(operands) == 0:
        raise ValueError("No input operands")

    if isinstance(operands[0], str):
        subscripts = operands[0].replace(" ", "")
        operands = [asanyarray(v) for v in operands[1:]]

        # Ensure all characters are valid
        for s in subscripts:
            if s in '.,->':
                continue
            if s not in einsum_symbols:
                raise ValueError("Character %s is not a valid symbol." % s)

    else:
        tmp_operands = list(operands)
        operand_list = []
        subscript_list = []
        for p in range(len(operands) // 2):
            operand_list.append(tmp_operands.pop(0))
            subscript_list.append(tmp_operands.pop(0))

        output_list = tmp_operands[-1] if len(tmp_operands) else None
        operands = [asanyarray(v) for v in operand_list]
        subscripts = ""
        last = len(subscript_list) - 1
        for num, sub in enumerate(subscript_list):
            for s in sub:
                if s is Ellipsis:
                    subscripts += "..."
                else:
                    try:
                        s = operator.index(s)
                    except TypeError as e:
                        raise TypeError("For this input type lists must contain "
                                        "either int or Ellipsis") from e
                    subscripts += einsum_symbols[s]
            if num != last:
                subscripts += ","

        if output_list is not None:
            subscripts += "->"
            for s in output_list:
                if s is Ellipsis:
                    subscripts += "..."
                else:
                    try:
                        s = operator.index(s)
                    except TypeError as e:
                        raise TypeError("For this input type lists must contain "
                                        "either int or Ellipsis") from e
                    subscripts += einsum_symbols[s]
    # Check for proper "->"
    if ("-" in subscripts) or (">" in subscripts):
        invalid = (subscripts.count("-") > 1) or (subscripts.count(">") > 1)
        if invalid or (subscripts.count("->") != 1):
            raise ValueError("Subscripts can only contain one '->'.")

    # Parse ellipses
    if "." in subscripts:
        used = subscripts.replace(".", "").replace(",", "").replace("->", "")
        unused = list(einsum_symbols_set - set(used))
        ellipse_inds = "".join(unused)
        longest = 0

        if "->" in subscripts:
            input_tmp, output_sub = subscripts.split("->")
            split_subscripts = input_tmp.split(",")
            out_sub = True
        else:
            split_subscripts = subscripts.split(',')
            out_sub = False

        for num, sub in enumerate(split_subscripts):
            if "." in sub:
                if (sub.count(".") != 3) or (sub.count("...") != 1):
                    raise ValueError("Invalid Ellipses.")

                # Take into account numerical values
                if operands[num].shape == ():
                    ellipse_count = 0
                else:
                    ellipse_count = max(operands[num].ndim, 1)
                    ellipse_count -= (len(sub) - 3)

                if ellipse_count > longest:
                    longest = ellipse_count

                if ellipse_count < 0:
                    raise ValueError("Ellipses lengths do not match.")
                elif ellipse_count == 0:
                    split_subscripts[num] = sub.replace('...', '')
                else:
                    rep_inds = ellipse_inds[-ellipse_count:]
                    split_subscripts[num] = sub.replace('...', rep_inds)

        subscripts = ",".join(split_subscripts)
        if longest == 0:
            out_ellipse = ""
        else:
            out_ellipse = ellipse_inds[-longest:]

        if out_sub:
            subscripts += "->" + output_sub.replace("...", out_ellipse)
        else:
            # Special care for outputless ellipses
            output_subscript = ""
            tmp_subscripts = subscripts.replace(",", "")
            for s in sorted(set(tmp_subscripts)):
                if s not in (einsum_symbols):
                    raise ValueError("Character %s is not a valid symbol." % s)
                if tmp_subscripts.count(s) == 1:
                    output_subscript += s
            normal_inds = ''.join(sorted(set(output_subscript) -
                                         set(out_ellipse)))

            subscripts += "->" + out_ellipse + normal_inds

    # Build output string if does not exist
    if "->" in subscripts:
        input_subscripts, output_subscript = subscripts.split("->")
    else:
        input_subscripts = subscripts
        # Build output subscripts
        tmp_subscripts = subscripts.replace(",", "")
        output_subscript = ""
        for s in sorted(set(tmp_subscripts)):
            if s not in einsum_symbols:
                raise ValueError("Character %s is not a valid symbol." % s)
            if tmp_subscripts.count(s) == 1:
                output_subscript += s

    # Make sure output subscripts are in the input
    for char in output_subscript:
        if char not in input_subscripts:
            raise ValueError("Output character %s did not appear in the input"
                             % char)

    # Make sure number operands is equivalent to the number of terms
    if len(input_subscripts.split(',')) != len(operands):
        raise ValueError("Number of einsum subscripts must be equal to the "
                         "number of operands.")

    return (input_subscripts, output_subscript, operands)
예제 #60
0
def binned_statistic_dd(sample,
                        values,
                        statistic='mean',
                        bins=10,
                        range=None,
                        expand_binnumbers=False,
                        binned_statistic_result=None):
    """
    Compute a multidimensional binned statistic for a set of data.

    This is a generalization of a histogramdd function.  A histogram divides
    the space into bins, and returns the count of the number of points in
    each bin.  This function allows the computation of the sum, mean, median,
    or other statistic of the values within each bin.

    Parameters
    ----------
    sample : array_like
        Data to histogram passed as a sequence of N arrays of length D, or
        as an (N,D) array.
    values : (N,) array_like or list of (N,) array_like
        The data on which the statistic will be computed.  This must be
        the same shape as `sample`, or a list of sequences - each with the
        same shape as `sample`.  If `values` is such a list, the statistic
        will be computed on each independently.
    statistic : string or callable, optional
        The statistic to compute (default is 'mean').
        The following statistics are available:

          * 'mean' : compute the mean of values for points within each bin.
            Empty bins will be represented by NaN.
          * 'median' : compute the median of values for points within each
            bin. Empty bins will be represented by NaN.
          * 'count' : compute the count of points within each bin.  This is
            identical to an unweighted histogram.  `values` array is not
            referenced.
          * 'sum' : compute the sum of values for points within each bin.
            This is identical to a weighted histogram.
          * 'std' : compute the standard deviation within each bin. This
            is implicitly calculated with ddof=0. If the number of values
            within a given bin is 0 or 1, the computed standard deviation value
            will be 0 for the bin.
          * 'min' : compute the minimum of values for points within each bin.
            Empty bins will be represented by NaN.
          * 'max' : compute the maximum of values for point within each bin.
            Empty bins will be represented by NaN.
          * function : a user-defined function which takes a 1D array of
            values, and outputs a single numerical statistic. This function
            will be called on the values in each bin.  Empty bins will be
            represented by function([]), or NaN if this returns an error.

    bins : sequence or positive int, optional
        The bin specification must be in one of the following forms:

          * A sequence of arrays describing the bin edges along each dimension.
          * The number of bins for each dimension (nx, ny, ... = bins).
          * The number of bins for all dimensions (nx = ny = ... = bins).
    range : sequence, optional
        A sequence of lower and upper bin edges to be used if the edges are
        not given explicitly in `bins`. Defaults to the minimum and maximum
        values along each dimension.
    expand_binnumbers : bool, optional
        'False' (default): the returned `binnumber` is a shape (N,) array of
        linearized bin indices.
        'True': the returned `binnumber` is 'unraveled' into a shape (D,N)
        ndarray, where each row gives the bin numbers in the corresponding
        dimension.
        See the `binnumber` returned value, and the `Examples` section of
        `binned_statistic_2d`.
    binned_statistic_result : binnedStatisticddResult
        Result of a previous call to the function in order to reuse bin edges
        and bin numbers with new values and/or a different statistic.
        To reuse bin numbers, `expand_binnumbers` must have been set to False
        (the default)

        .. versionadded:: 0.17.0

    Returns
    -------
    statistic : ndarray, shape(nx1, nx2, nx3,...)
        The values of the selected statistic in each two-dimensional bin.
    bin_edges : list of ndarrays
        A list of D arrays describing the (nxi + 1) bin edges for each
        dimension.
    binnumber : (N,) array of ints or (D,N) ndarray of ints
        This assigns to each element of `sample` an integer that represents the
        bin in which this observation falls.  The representation depends on the
        `expand_binnumbers` argument.  See `Notes` for details.


    See Also
    --------
    numpy.digitize, numpy.histogramdd, binned_statistic, binned_statistic_2d

    Notes
    -----
    Binedges:
    All but the last (righthand-most) bin is half-open in each dimension.  In
    other words, if `bins` is ``[1, 2, 3, 4]``, then the first bin is
    ``[1, 2)`` (including 1, but excluding 2) and the second ``[2, 3)``.  The
    last bin, however, is ``[3, 4]``, which *includes* 4.

    `binnumber`:
    This returned argument assigns to each element of `sample` an integer that
    represents the bin in which it belongs.  The representation depends on the
    `expand_binnumbers` argument. If 'False' (default): The returned
    `binnumber` is a shape (N,) array of linearized indices mapping each
    element of `sample` to its corresponding bin (using row-major ordering).
    If 'True': The returned `binnumber` is a shape (D,N) ndarray where
    each row indicates bin placements for each dimension respectively.  In each
    dimension, a binnumber of `i` means the corresponding value is between
    (bin_edges[D][i-1], bin_edges[D][i]), for each dimension 'D'.

    .. versionadded:: 0.11.0

    Examples
    --------
    >>> from scipy import stats
    >>> import matplotlib.pyplot as plt
    >>> from mpl_toolkits.mplot3d import Axes3D

    Take an array of 600 (x, y) coordinates as an example.
    `binned_statistic_dd` can handle arrays of higher dimension `D`. But a plot
    of dimension `D+1` is required.

    >>> mu = np.array([0., 1.])
    >>> sigma = np.array([[1., -0.5],[-0.5, 1.5]])
    >>> multinormal = stats.multivariate_normal(mu, sigma)
    >>> data = multinormal.rvs(size=600, random_state=235412)
    >>> data.shape
    (600, 2)

    Create bins and count how many arrays fall in each bin:

    >>> N = 60
    >>> x = np.linspace(-3, 3, N)
    >>> y = np.linspace(-3, 4, N)
    >>> ret = stats.binned_statistic_dd(data, np.arange(600), bins=[x, y],
    ...                                 statistic='count')
    >>> bincounts = ret.statistic

    Set the volume and the location of bars:

    >>> dx = x[1] - x[0]
    >>> dy = y[1] - y[0]
    >>> x, y = np.meshgrid(x[:-1]+dx/2, y[:-1]+dy/2)
    >>> z = 0

    >>> bincounts = bincounts.ravel()
    >>> x = x.ravel()
    >>> y = y.ravel()

    >>> fig = plt.figure()
    >>> ax = fig.add_subplot(111, projection='3d')
    >>> with np.errstate(divide='ignore'):   # silence random axes3d warning
    ...     ax.bar3d(x, y, z, dx, dy, bincounts)

    Reuse bin numbers and bin edges with new values:

    >>> ret2 = stats.binned_statistic_dd(data, -np.arange(600),
    ...                                  binned_statistic_result=ret,
    ...                                  statistic='mean')
    """
    known_stats = ['mean', 'median', 'count', 'sum', 'std', 'min', 'max']
    if not callable(statistic) and statistic not in known_stats:
        raise ValueError('invalid statistic %r' % (statistic, ))

    try:
        bins = index(bins)
    except TypeError:
        # bins is not an integer
        pass
    # If bins was an integer-like object, now it is an actual Python int.

    # NOTE: for _bin_edges(), see e.g. gh-11365
    if isinstance(bins, int) and not np.isfinite(sample).all():
        raise ValueError('%r contains non-finite values.' % (sample, ))

    # `Ndim` is the number of dimensions (e.g. `2` for `binned_statistic_2d`)
    # `Dlen` is the length of elements along each dimension.
    # This code is based on np.histogramdd
    try:
        # `sample` is an ND-array.
        Dlen, Ndim = sample.shape
    except (AttributeError, ValueError):
        # `sample` is a sequence of 1D arrays.
        sample = np.atleast_2d(sample).T
        Dlen, Ndim = sample.shape

    # Store initial shape of `values` to preserve it in the output
    values = np.asarray(values)
    input_shape = list(values.shape)
    # Make sure that `values` is 2D to iterate over rows
    values = np.atleast_2d(values)
    Vdim, Vlen = values.shape

    # Make sure `values` match `sample`
    if (statistic != 'count' and Vlen != Dlen):
        raise AttributeError('The number of `values` elements must match the '
                             'length of each `sample` dimension.')

    try:
        M = len(bins)
        if M != Ndim:
            raise AttributeError('The dimension of bins must be equal '
                                 'to the dimension of the sample x.')
    except TypeError:
        bins = Ndim * [bins]

    if binned_statistic_result is None:
        nbin, edges, dedges = _bin_edges(sample, bins, range)
        binnumbers = _bin_numbers(sample, nbin, edges, dedges)
    else:
        edges = binned_statistic_result.bin_edges
        nbin = np.array([len(edges[i]) + 1 for i in builtins.range(Ndim)])
        # +1 for outlier bins
        dedges = [np.diff(edges[i]) for i in builtins.range(Ndim)]
        binnumbers = binned_statistic_result.binnumber

    result = np.empty([Vdim, nbin.prod()], float)

    if statistic == 'mean':
        result.fill(np.nan)
        flatcount = np.bincount(binnumbers, None)
        a = flatcount.nonzero()
        for vv in builtins.range(Vdim):
            flatsum = np.bincount(binnumbers, values[vv])
            result[vv, a] = flatsum[a] / flatcount[a]
    elif statistic == 'std':
        result.fill(0)
        flatcount = np.bincount(binnumbers, None)
        a = flatcount.nonzero()
        for vv in builtins.range(Vdim):
            for i in np.unique(binnumbers):
                # NOTE: take std dev by bin, np.std() is 2-pass and stable
                binned_data = values[vv, binnumbers == i]
                # calc std only when binned data is 2 or more for speed up.
                if len(binned_data) >= 2:
                    result[vv, i] = np.std(binned_data)
    elif statistic == 'count':
        result.fill(0)
        flatcount = np.bincount(binnumbers, None)
        a = np.arange(len(flatcount))
        result[:, a] = flatcount[np.newaxis, :]
    elif statistic == 'sum':
        result.fill(0)
        for vv in builtins.range(Vdim):
            flatsum = np.bincount(binnumbers, values[vv])
            a = np.arange(len(flatsum))
            result[vv, a] = flatsum
    elif statistic == 'median':
        result.fill(np.nan)
        for i in np.unique(binnumbers):
            for vv in builtins.range(Vdim):
                result[vv, i] = np.median(values[vv, binnumbers == i])
    elif statistic == 'min':
        result.fill(np.nan)
        for i in np.unique(binnumbers):
            for vv in builtins.range(Vdim):
                result[vv, i] = np.min(values[vv, binnumbers == i])
    elif statistic == 'max':
        result.fill(np.nan)
        for i in np.unique(binnumbers):
            for vv in builtins.range(Vdim):
                result[vv, i] = np.max(values[vv, binnumbers == i])
    elif callable(statistic):
        with np.errstate(invalid='ignore'), suppress_warnings() as sup:
            sup.filter(RuntimeWarning)
            try:
                null = statistic([])
            except Exception:
                null = np.nan
        result.fill(null)
        for i in np.unique(binnumbers):
            for vv in builtins.range(Vdim):
                result[vv, i] = statistic(values[vv, binnumbers == i])

    # Shape into a proper matrix
    result = result.reshape(np.append(Vdim, nbin))

    # Remove outliers (indices 0 and -1 for each bin-dimension).
    core = tuple([slice(None)] + Ndim * [slice(1, -1)])
    result = result[core]

    # Unravel binnumbers into an ndarray, each row the bins for each dimension
    if (expand_binnumbers and Ndim > 1):
        binnumbers = np.asarray(np.unravel_index(binnumbers, nbin))

    if np.any(result.shape[1:] != nbin - 2):
        raise RuntimeError('Internal Shape Error')

    # Reshape to have output (`result`) match input (`values`) shape
    result = result.reshape(input_shape[:-1] + list(nbin - 2))

    return BinnedStatisticddResult(result, edges, binnumbers)