def series_range(series): """ Compute the range of a continuously-valued time series. Examples: .. doctest:: utils >>> utils.series_range([0,1,2,3,4,5]) (5.0, 0.0, 5.0) >>> utils.series_range([-0.1, 8.5, 0.02, -6.3]) (14.8, -6.3, 8.5) :param sequence series: the time series :returns: the range and the minimum/maximum values :rtype: 3-tuple (float, float, float) :raises InformError: if an error occurs within the ``inform`` C call """ xs = np.ascontiguousarray(series, dtype=np.float64) data = xs.ctypes.data_as(POINTER(c_double)) min, max = c_double(), c_double() e = ErrorCode(0) rng = _inform_range(data, c_ulong(xs.size), byref(min), byref(max), byref(e)) error_guard(e) return rng, min.value, max.value
def coalesce_series(series): """ Coalesce a timeseries into as few contiguous states as possible. The magic of information measures is that the actual values of a time series are irrelavent. For example, :math:`\\{0,1,0,1,1\\}` has the same entropy as :math:`\\{2,9,2,9,9\\}` (possibly up to a rescaling). This give us the freedom to shift around the values of a time series as long as we do not change the relative number of states. This function thus provides a way of "compressing" a time series into as small a base as possible. For example .. doctest:: utils >>> utils.coalesce_series([2,9,2,9,9]) (array([0, 1, 0, 1, 1], dtype=int32), 2) Why is this useful? Many of the measures use the base of the time series to determine how much memory to allocate; the larger the base, the higher the memory usage. It also affects the overall performance as the combinatorics climb exponentially with the base. The two standard usage cases for this function are to reduce the base of a time series .. doctest:: utils >>> utils.coalesce_series([0,2,0,2,0,2]) (array([0, 1, 0, 1, 0, 1], dtype=int32), 2) or ensure that the states are non-negative .. doctest:: utils >>> utils.coalesce_series([-8,2,6,-2,4]) (array([0, 2, 4, 1, 3], dtype=int32), 5) Notice that the encoding that is used ensures that the ordering of the states stays the same, e.g. :math:`\\{-8 \\rightarrow 0, -2 \\rightarrow 1, 2 \\rightarrow 2, 4 \\rightarrow 3, 6 \\rightarrow 4\\}`. This isn't strictly necessary, so we are going to call it a "feature". :param sequence series: the time series to coalesce :return: the coalesced time series and its base :rtype: the 2-tuple (``numpy.ndarray``, int) :raises InformError: if an error occurs in the ``inform`` C call """ xs = np.ascontiguousarray(series, dtype=np.int32) data = xs.ctypes.data_as(POINTER(c_int)) cs = np.empty(xs.shape, dtype=np.int32) coal = cs.ctypes.data_as(POINTER(c_int)) e = ErrorCode(0) b = _inform_coalesce(data, c_ulong(xs.size), coal, byref(e)) error_guard(e) return cs, b
def transfer_entropy(source, target, k, b=0, local=False): """ Compute the local or average transfer entropy from one time series to another with target history length *k*. If the base *b* is not specified (or is 0), then it is inferred from the time series with 2 as a minimum. *b* must be at least the base of the time series and is used as the base of the logarithm. :param source: the source time series :type source: sequence or ``numpy.ndarray`` :param target: the target time series :type target: sequence or ``numpy.ndarray`` :param int k: the history length :param int b: the base of the time series and logarithm :param bool local: compute the local transfer entropy :returns: the average or local transfer entropy :rtype: float or ``numpy.ndarray`` :raises ValueError: if the time series have different shapes :raises ValueError: if either time series has no initial conditions :raises ValueError: if either time series is greater than 2-D :raises InformError: if an error occurs within the ``inform`` C call """ ys = np.ascontiguousarray(source, np.int32) xs = np.ascontiguousarray(target, np.int32) if xs.shape != ys.shape: raise ValueError("source and target timeseries are different shapes") elif xs.ndim == 0: raise ValueError("empty timeseries") elif xs.ndim > 2: raise ValueError("dimension greater than 2") if b == 0: b = max(2, max(np.amax(xs), np.amax(ys)) + 1) ydata = ys.ctypes.data_as(POINTER(c_int)) xdata = xs.ctypes.data_as(POINTER(c_int)) if xs.ndim == 1: n, m = 1, xs.shape[0] else: n, m = xs.shape e = ErrorCode(0) if local is True: q = max(0, m - k) ai = np.empty((n, q), dtype=np.float64) out = ai.ctypes.data_as(POINTER(c_double)) _local_transfer_entropy(ydata, xdata, c_ulong(n), c_ulong(m), c_int(b), c_ulong(k), out, byref(e)) else: ai = _transfer_entropy(ydata, xdata, c_ulong(n), c_ulong(m), c_int(b), c_ulong(k), byref(e)) error_guard(e) return ai
def encode(state, b=None): """ Encode a base-*b* array of integers into a single integer. This function uses a `big-endian`__ encoding scheme. That is, the most significant bits of the encoded integer are determined by the left-most end of the unencoded state. .. doctest:: utils >>> utils.encode([0,0,1], b=2) 1 >>> utils.encode([0,1,0], b=3) 3 >>> utils.encode([1,0,0], b=4) 16 >>> utils.encode([1,0,4], b=5) 29 If *b* is not provided (or is None), the base is inferred from the state with a minimum value of 2. .. doctest:: utils >>> utils.encode([0,0,2]) 2 >>> utils.encode([0,2,0]) 6 >>> utils.encode([1,2,1]) 16 See also :py:func:`.decode`. .. __: https://en.wikipedia.org/wiki/Endianness#Examples :param sequence state: the state to encode :param int b: the base in which to encode :return: the encoded state :rtype: int :raises ValueError: if the state is empty :raises InformError: if an error occurs in the ``inform`` C call """ xs = np.ascontiguousarray(state, dtype=np.int32) data = xs.ctypes.data_as(POINTER(c_int)) if xs.size == 0: raise ValueError("cannot encode an empty array") if b is None: b = max(2, np.amax(xs) + 1) e = ErrorCode(0) encoding = _inform_encode(data, c_ulong(xs.size), c_int(b), byref(e)) error_guard(e) return encoding
def conditional_entropy(xs, ys, bx=0, by=0, b=2.0, local=False): """ Compute the (local) conditional entropy between two time series. This function expects the **condition** to be the first argument. The bases *bx* and *by* are inferred from their respective time series if they are not provided (or are 0). The minimum value in both cases is 2. This function explicitly takes the logarithmic base *b* as an argument. :param xs: the time series drawn from the conditional distribution :type xs: a sequence or ``numpy.ndarray`` :param ys: the time series drawn from the target distribution :type ys: a sequence or ``numpy.ndarray`` :param int bx: the base of the conditional time series :param int by: the base of the target time series :param double b: the logarithmic base :param bool local: compute the local conditional entropy :return: the local or average conditional entropy :rtype: float or ``numpy.ndarray`` :raises ValueError: if the time series have different shapes :raises InformError: if an error occurs within the ``inform`` C call """ us = np.ascontiguousarray(xs, dtype=np.int32) vs = np.ascontiguousarray(ys, dtype=np.int32) if us.shape != vs.shape: raise ValueError("timeseries lengths do not match") if bx == 0: bx = max(2, np.amax(us) + 1) if by == 0: by = max(2, np.amax(vs) + 1) xdata = us.ctypes.data_as(POINTER(c_int)) ydata = vs.ctypes.data_as(POINTER(c_int)) n = us.size e = ErrorCode(0) if local is True: ce = np.empty(us.shape, dtype=np.float64) out = ce.ctypes.data_as(POINTER(c_double)) _local_conditional_entropy(xdata, ydata, c_ulong(n), c_int(bx), c_int(by), c_double(b), out, byref(e)) else: ce = _conditional_entropy(xdata, ydata, c_ulong(n), c_int(bx), c_int(by), c_double(b), byref(e)) error_guard(e) return ce
def entropy_rate(series, k, b=0, local=False): """ Compute the average or local entropy rate of a time series with history length *k*. If the base *b* is not specified (or is 0), then it is inferred from the time series (with 2) as a minimum. *b* must be at least the base of the time series and is used a the base of the logarithm. :param series: the time series :type series: sequence or ``numpy.ndarray`` :param int k: the history length :param int b: the base of the time series and logarithm :param bool local: compute the local active information :returns: the average or local entropy rate :rtype: float or ``numpy.ndarray`` :raises ValueError: if the time series has no initial conditions :raises ValueError: if the time series is greater than 2-D :raises InformError: if an error occurs within the ``inform`` C call """ xs = np.ascontiguousarray(series, np.int32) if xs.ndim == 0: raise ValueError("empty timeseries") elif xs.ndim > 2: raise ValueError("dimension greater than 2") if b == 0: b = max(2, np.amax(xs) + 1) data = xs.ctypes.data_as(POINTER(c_int)) if xs.ndim == 1: n, m = 1, xs.shape[0] else: n, m = xs.shape e = ErrorCode(0) if local is True: q = max(0, m - k) er = np.empty((n, q), dtype=np.float64) out = er.ctypes.data_as(POINTER(c_double)) _local_entropy_rate(data, c_ulong(n), c_ulong(m), c_int(b), c_ulong(k), out, byref(e)) else: er = _entropy_rate(data, c_ulong(n), c_ulong(m), c_int(b), c_ulong(k), byref(e)) error_guard(e) return er
def block_entropy(series, k, b=0, local=False): """ Compute the (local) block entropy of a time series with block size *k*. If *b* is 0, then the base is inferred from the time series with a minimum value of 2. The base *b* must be at least the base of the time series and is used as the base of the logarithm. :param series: the time series :type series: sequence or `numpy.ndarray` :param int k: the block size :param int b: the base of the logarithm :param bool local: compute the local block entropy :returns: the average or local block entropy :rtype: float or `numpy.ndarray` :raises ValueError: if the time series has no initial conditions :raises ValueError: if the time series is greater than 2-D :raises InformError: if an error occurs within the ``inform`` C call """ xs = np.ascontiguousarray(series, np.int32) if xs.ndim == 0: raise ValueError("empty timeseries") elif xs.ndim > 2: raise ValueError("dimension greater than 2") if b == 0: b = max(2, np.amax(xs) + 1) data = xs.ctypes.data_as(POINTER(c_int)) if xs.ndim == 1: n, m = 1, xs.shape[0] else: n, m = xs.shape e = ErrorCode(0) if local is True: q = max(0, m - k + 1) ai = np.empty((n, q), dtype=np.float64) out = ai.ctypes.data_as(POINTER(c_double)) _local_block_entropy(data, c_ulong(n), c_ulong(m), c_int(b), c_ulong(k), out, byref(e)) else: ai = _block_entropy(data, c_ulong(n), c_ulong(m), c_int(b), c_ulong(k), byref(e)) error_guard(e) return ai
def relative_entropy(xs, ys, b=0, base=2.0, local=False): """ Compute the local or global relative entropy between two time series treating each as observations from a distribution. The base *b* is inferred from the time series if it is not provided (or is 0). The minimum value is 2. This function explicitly takes the logarithmic base *base* as an argument. :param xs: the time series sampled from the posterior distribution :type xs: a sequence or ``numpy.ndarray`` :param ys: the time series sampled from the prior distribution :type ys: a sequence or ``numpy.ndarray`` :param int b: the base of the time series :param double b: the logarithmic base :param bool local: compute the local relative entropy :return: the local or global relative entropy :rtype: float or ``numpy.ndarray`` :raises ValueError: if the time series have different shapes :raises InformError: if an error occurs within the ``inform`` C call """ us = np.ascontiguousarray(xs, dtype=np.int32) vs = np.ascontiguousarray(ys, dtype=np.int32) if us.shape != vs.shape: raise ValueError("timeseries lengths do not match") if b == 0: b = max(2, np.amax(us) + 1, np.amax(vs) + 1) xdata = us.ctypes.data_as(POINTER(c_int)) ydata = vs.ctypes.data_as(POINTER(c_int)) n = us.size e = ErrorCode(0) if local is True: re = np.empty(b, dtype=np.float64) out = re.ctypes.data_as(POINTER(c_double)) _local_relative_entropy(xdata, ydata, c_ulong(n), c_int(b), c_double(base), out, byref(e)) else: re = _relative_entropy(xdata, ydata, c_ulong(n), c_int(b), c_double(base), byref(e)) error_guard(e) return re
def mutual_info(xs, ys, local=False): """ Compute the (local) mutual information between two time series. This function explicitly takes the logarithmic base *b* as an argument. :param xs: a time series :type xs: a sequence or ``numpy.ndarray`` :param ys: a time series :type ys: a sequence or ``numpy.ndarray`` :param bool local: compute the local mutual information :return: the local or average mutual information :rtype: float or ``numpy.ndarray`` :raises ValueError: if the time series have different shapes :raises InformError: if an error occurs within the ``inform`` C call """ us = np.ascontiguousarray(xs, dtype=np.int32) vs = np.ascontiguousarray(ys, dtype=np.int32) if us.shape != vs.shape: raise ValueError("timeseries lengths do not match") series = np.ascontiguousarray([us.flatten(), vs.flatten()], dtype=np.int32) bx = max(2, np.amax(us) + 1) by = max(2, np.amax(vs) + 1) bs = np.ascontiguousarray([bx, by], dtype=np.int32) seriesdata = series.ctypes.data_as(POINTER(c_int)) bsdata = bs.ctypes.data_as(POINTER(c_int)) l, n = series.shape e = ErrorCode(0) if local is True: mi = np.empty(us.shape, dtype=np.float64) out = mi.ctypes.data_as(POINTER(c_double)) _local_mutual_info(seriesdata, c_ulong(l), c_ulong(n), bsdata, out, byref(e)) else: mi = _mutual_info(seriesdata, c_ulong(l), c_ulong(n), bsdata, byref(e)) error_guard(e) return mi
def active_info(series, k, local=False): """ Compute the average or local active information of a timeseries with history length *k*. :param series: the time series :type series: sequence or ``numpy.ndarray`` :param int k: the history length :param bool local: compute the local active information :returns: the average or local active information :rtype: float or ``numpy.ndarray`` :raises ValueError: if the time series has no initial conditions :raises ValueError: if the time series is greater than 2-D :raises InformError: if an error occurs within the ``inform`` C call """ xs = np.ascontiguousarray(series, np.int32) if xs.ndim == 0: raise ValueError("empty timeseries") elif xs.ndim > 2: raise ValueError("dimension greater than 2") b = max(2, np.amax(xs) + 1) data = xs.ctypes.data_as(POINTER(c_int)) if xs.ndim == 1: n, m = 1, xs.shape[0] else: n, m = xs.shape e = ErrorCode(0) if local is True: q = max(0, m - k) ai = np.empty((n, q), dtype=np.float64) out = ai.ctypes.data_as(POINTER(c_double)) _local_active_info(data, c_ulong(n), c_ulong(m), c_int(b), c_ulong(k), out, byref(e)) else: ai = _active_info(data, c_ulong(n), c_ulong(m), c_int(b), c_ulong(k), byref(e)) error_guard(e) return ai
def conditional_entropy(xs, ys, local=False): """ Compute the (local) conditional entropy between two time series. This function expects the **condition** to be the first argument. :param xs: the time series drawn from the conditional distribution :type xs: a sequence or ``numpy.ndarray`` :param ys: the time series drawn from the target distribution :type ys: a sequence or ``numpy.ndarray`` :param bool local: compute the local conditional entropy :return: the local or average conditional entropy :rtype: float or ``numpy.ndarray`` :raises ValueError: if the time series have different shapes :raises InformError: if an error occurs within the ``inform`` C call """ us = np.ascontiguousarray(xs, dtype=np.int32) vs = np.ascontiguousarray(ys, dtype=np.int32) if us.shape != vs.shape: raise ValueError("timeseries lengths do not match") bx = max(2, np.amax(us) + 1) by = max(2, np.amax(vs) + 1) xdata = us.ctypes.data_as(POINTER(c_int)) ydata = vs.ctypes.data_as(POINTER(c_int)) n = us.size e = ErrorCode(0) if local is True: ce = np.empty(us.shape, dtype=np.float64) out = ce.ctypes.data_as(POINTER(c_double)) _local_conditional_entropy(xdata, ydata, c_ulong(n), c_int(bx), c_int(by), out, byref(e)) else: ce = _conditional_entropy(xdata, ydata, c_ulong(n), c_int(bx), c_int(by), byref(e)) error_guard(e) return ce
def bin_series(series, b=None, step=None, bounds=None): """ Bin a continously-valued times series. The binning can be performed in any one of three ways. .. rubric:: 1. Specified Number of Bins The first is binning the time series into *b* uniform bins (with *b* an integer). .. doctest:: utils >>> import numpy as np >>> np.random.seed(2019) >>> xs = 10 * np.random.rand(20) >>> xs array([9.03482214, 3.93080507, 6.23969961, 6.37877401, 8.80499069, 2.99172019, 7.0219827 , 9.03206161, 8.81381926, 4.05749798, 4.52446621, 2.67070324, 1.6286487 , 8.89214695, 1.48476226, 9.84723485, 0.32361219, 5.15350754, 2.01129047, 8.86010874]) >>> utils.bin_series(xs, b=2) (array([1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1], dtype=int32), 2, 4.761811327822174) >>> utils.bin_series(xs, b=3) (array([2, 1, 1, 1, 2, 0, 2, 2, 2, 1, 1, 0, 0, 2, 0, 2, 0, 1, 0, 2], dtype=int32), 3, 3.1745408852147823) With this approach the binned sequence (as an ``numpy.ndarray``), the number of bins, and the size of each bin are returned. This binning method is useful if, for example, the user wants to bin several time series to the same base. .. rubric:: 2. Fixed Size Bins The second type of binning produces bins of a specific size *step*. .. doctest:: utils >>> utils.bin_series(xs, step=4.0) (array([2, 0, 1, 1, 2, 0, 1, 2, 2, 0, 1, 0, 0, 2, 0, 2, 0, 1, 0, 2], dtype=int32), 3, 4.0) >>> utils.bin_series(xs, step=2.0) (array([4, 1, 2, 3, 4, 1, 3, 4, 4, 1, 2, 1, 0, 4, 0, 4, 0, 2, 0, 4], dtype=int32), 5, 2.0) As in the previous case the binned sequence, the number of bins, and the size of each bin are returned. This approach is appropriate when the system at hand has a particular sensitivity or precision, e.g. if the system is sensitive down to 5.0mV changes in potential. .. rubric:: 3. Thresholds The third type of binning is breaks the real number line into segments with specified boundaries or thresholds, and the time series is binned according to this partitioning. The bounds are expected to be provided in ascending order. .. doctest:: utils >>> utils.bin_series(xs, bounds=[2.0, 7.5]) (array([2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 0, 2, 0, 2, 0, 1, 1, 2], dtype=int32), 3, [2.0, 7.5]) >>> utils.bin_series(xs, bounds=[2.0]) (array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1], dtype=int32), 2, [2.0]) Unlike the previous two types of binning, this approach returns the specific bounds rather than the bin sizes. The other two returns, the binned sequence and the number of bins, are returned as before. This approach is useful in situations where the system has natural thesholds, e.g. the polarized/hyperpolarized states of a neuron. :param sequence series: the continuously-valued time series :param int b: the desired number of uniform bins :param float step: the desired size of each uniform bin :param sequence bounds: the (finite) bounds of each bin :return: the binned sequence, the number of bins and either the bin sizes or bin bounds :rtype: either (``numpy.ndarray``, int, float) or (``numpy.ndarray``, int, sequence) :raises ValueError: if no keyword argument is provided :raises ValueError: if more than one keyword argument is provided :raises InformError: if an error occurs in the ``inform`` C call """ if b is None and step is None and bounds is None: raise ValueError( "must provide either number of bins, step size, or bin boundaries") elif b is not None and step is not None: raise ValueError("cannot provide both number of bins and step size") elif b is not None and bounds is not None: raise ValueError( "cannot provide both number of bins and bin boundaries") elif step is not None and bounds is not None: raise ValueError("cannot provide both step size and bin boundaries") xs = np.ascontiguousarray(series, dtype=np.float64) data = xs.ctypes.data_as(POINTER(c_double)) binned = np.empty(xs.shape, dtype=np.int32) out = binned.ctypes.data_as(POINTER(c_int)) e = ErrorCode(0) if b is not None: spec = _inform_bin(data, c_ulong(xs.size), c_int(b), out, byref(e)) elif step is not None: spec = step b = _inform_bin_step(data, c_ulong(xs.size), c_double(step), out, byref(e)) elif bounds is not None: boundaries = np.ascontiguousarray(bounds, dtype=np.float64) bnds = boundaries.ctypes.data_as(POINTER(c_double)) spec = bounds b = _inform_bin_bounds(data, c_ulong(xs.size), bnds, c_ulong(boundaries.size), out, byref(e)) error_guard(e) return binned, b, spec
def decode(encoding, b, n=None): """ Decode an integer into a base-*b* array with *n* digits. The provided encoded state is decoded using the `big-endian`__ encoding scheme. .. doctest:: utils >>> utils.decode(2, b=2, n=2) array([1, 0], dtype=int32) >>> utils.decode(6, b=2, n=3) array([1, 1, 0], dtype=int32) >>> utils.decode(6, b=3, n=2) array([2, 0], dtype=int32) Note that the base *b* must be provided, but the number of digits *n* is optional. If it is provided then the decoded state will have exactly that many elements. .. doctest:: utils >>> utils.decode(2, b=2, n=4) array([0, 0, 1, 0], dtype=int32) However, if *n* is too small to contain a full representation of the state, an error will be raised. .. doctest:: utils >>> utils.decode(6, b=2, n=2) Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/home/ubuntu/workspace/pyinform/utils/encoding.py", line 126, in decode error_guard(e) File "/home/ubuntu/workspace/pyinform/error.py", line 57, in error_guard raise InformError(e,func) pyinform.error.InformError: an inform error occurred - "encoding/decoding failed" If *n* is not provided, the length of the decoded state is as small as possible: .. doctest:: utils >>> utils.decode(1, b=2) array([1], dtype=int32) >>> utils.decode(1, b=3) array([1], dtype=int32) >>> utils.decode(3, b=2) array([1, 1], dtype=int32) >>> utils.decode(3, b=3) array([1, 0], dtype=int32) >>> utils.decode(3, b=4) array([3], dtype=int32) Of course :py:func:`.encode` and :py:func:`.decode` play well together. .. doctest:: utils >>> for i in range(100): ... assert(utils.encode(utils.decode(i, b=2)) == i) ... >>> See also :py:func:`.encode`. .. __: https://en.wikipedia.org/wiki/Endianness#Examples :param int encoding: the encoded state :param int b: the desired base :param int n: the desired number of digits :return: the decoded state :rtype: ``numpy.ndarray`` :raises InformError: if *n* is too small to contain the decoding :raises InformError: if an error occurs within the ``inform`` C call """ if n is None: state = np.empty(32, dtype=np.int32) else: state = np.empty(n, dtype=np.int32) out = state.ctypes.data_as(POINTER(c_int)) e = ErrorCode(0) _inform_decode(c_int(encoding), c_int(b), out, c_ulong(state.size), byref(e)) error_guard(e) if n is None: for i in range(32): if state[i] != 0: break state = state[i:] return state
def transfer_entropy(source, target, k, condition=None, local=False): """ Compute the local or average transfer entropy from one time series to another with target history length *k*. Optionally, time series can be provided against which to *condition*. :param source: the source time series :type source: sequence or ``numpy.ndarray`` :param target: the target time series :type target: sequence or ``numpy.ndarray`` :param int k: the history length :param condition: time series of any conditions :type condition: sequence or ``numpy.ndarray`` :param bool local: compute the local transfer entropy :returns: the average or local transfer entropy :rtype: float or ``numpy.ndarray`` :raises ValueError: if the time series have different shapes :raises ValueError: if either time series has no initial conditions :raises ValueError: if either time series is greater than 2-D :raises InformError: if an error occurs within the ``inform`` C call """ ys = np.ascontiguousarray(source, np.int32) xs = np.ascontiguousarray(target, np.int32) cs = np.ascontiguousarray(condition, np.int32) if condition is not None else None if xs.shape != ys.shape: raise ValueError("source and target timeseries are different shapes") elif xs.ndim > 2: raise ValueError("source and target have too great a dimension; must be 2 or less") if cs is None: pass elif cs.ndim == 1 and cs.shape != xs.shape: raise ValueError("condition has a shape that's inconsistent with the source and target") elif cs.ndim == 2 and xs.ndim == 1 and cs.shape[1:] != xs.shape: raise ValueError("condition has a shape that's inconsistent with the source and target") elif cs.ndim == 2 and xs.ndim == 2 and cs.shape != xs.shape: raise ValueError("condition has a shape that's inconsistent with the source and target") elif cs.ndim == 3 and cs.shape[1:] != xs.shape: raise ValueError("condition has a shape that's inconsistent with the source and target") elif cs.ndim > 3: raise ValueError("condition has too great a dimension; must be 3 or less") ydata = ys.ctypes.data_as(POINTER(c_int)) xdata = xs.ctypes.data_as(POINTER(c_int)) cdata = cs.ctypes.data_as(POINTER(c_int)) if cs is not None else None if cs is None: b = max(2, max(np.amax(xs), np.amax(ys)) + 1) else: b = max(2, max(np.amax(xs), np.amax(ys), np.amax(cs)) + 1) if cs is None: z = 0 elif cs.ndim == 1 or (cs.ndim == 2 and xs.ndim == 2): z = 1 elif cs.ndim == 3 or (cs.ndim == 2 and xs.ndim == 1): z = cs.shape[0] else: raise RuntimeError("unexpected state: condition and source are inconsistent shapes") if xs.ndim == 1: n, m = 1, xs.shape[0] else: n, m = xs.shape e = ErrorCode(0) if local is True: q = max(0, m - k) te = np.empty((n, q), dtype=np.float64) out = te.ctypes.data_as(POINTER(c_double)) _local_transfer_entropy(ydata, xdata, cdata, c_ulong(z), c_ulong(n), c_ulong(m), c_int(b), c_ulong(k), out, byref(e)) else: te = _transfer_entropy(ydata, xdata, cdata, c_ulong(z), c_ulong(n), c_ulong(m), c_int(b), c_ulong(k), byref(e)) error_guard(e) return te
def bin_series(series, b=None, step=None, bounds=None): """ Bin a continously-valued times series. The binning can be performed in any one of three ways. .. rubric:: 1. Specified Number of Bins The first is binning the time series into *b* uniform bins (with *b* an integer). :: >>> from pyinform import utils >>> import numpy as np >>> xs = 10 * np.random.rand(20) >>> xs array([ 6.62004974, 7.24471972, 0.76670198, 2.66306833, 4.32200795, 8.84902227, 6.83491844, 7.05008074, 3.79287646, 6.50844032, 8.68804879, 6.79543773, 0.3222078 , 7.39576325, 7.54150189, 1.06422897, 1.91958431, 2.34760945, 3.90139184, 3.08885353]) >>> utils.bin_series(xs, b=2) (array([1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0], dtype=int32), 2, 4.263407236635026) >>> utils.bin_series(xs, b=3) (array([2, 2, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 0, 0, 0, 1, 0], dtype=int32), 3, 2.8422714910900173) With this approach the binned sequence (as an ``numpy.ndarray``), the number of bins, and the size of each bin are returned. This binning method is useful if, for example, the user wants to bin several time series to the same base. .. rubric:: 2. Fixed Size Bins The second type of binning produces bins of a specific size *step*.:: >>> utils.bin_series(xs, step=4.0) (array([1, 1, 0, 0, 0, 2, 1, 1, 0, 1, 2, 1, 0, 1, 1, 0, 0, 0, 0, 0], dtype=int32), 3, 4.0) >>> utils.bin_series(xs, step=2.0) (array([3, 3, 0, 1, 1, 4, 3, 3, 1, 3, 4, 3, 0, 3, 3, 0, 0, 1, 1, 1], dtype=int32), 5, 2.0) As in the previous case the binned sequence, the number of bins, and the size of each bin are returned. This approach is appropriate when the system at hand has a particular sensitivity or precision, e.g. if the system is sensitive down to 5.0mV changes in potential. .. rubric:: 3. Thresholds The third type of binning is breaks the real number line into segments with specified boundaries or thresholds, and the time series is binned according to this partitioning. The bounds are expected to be provided in ascending order.:: >>> utils.bin_series(xs, bounds=[2.0, 7.5]) (array([1, 1, 0, 1, 1, 2, 1, 1, 1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 1, 1], dtype=int32), 3, [2.0, 7.5]) >>> utils.bin_series(xs, bounds=[2.0]) (array([1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1], dtype=int32), 2, [2.0]) Unlike the previous two types of binning, this approach returns the specific bounds rather than the bin sizes. The other two returns, the binned sequence and the number of bins, are returned as before. This approach is useful in situations where the system has natural thesholds, e.g. the polarized/hyperpolarized states of a neuron. :param sequence series: the continuously-valued time series :param int b: the desired number of uniform bins :param float step: the desired size of each uniform bin :param sequence bounds: the (finite) bounds of each bin :return: the binned sequence, the number of bins and either the bin sizes or bin bounds :rtype: either (``numpy.ndarray``, int, float) or (``numpy.ndarray``, int, sequence) :raises ValueError: if no keyword argument is provided :raises ValueError: if more than one keyword argument is provided :raises InformError: if an error occurs in the ``inform`` C call """ if b is None and step is None and bounds is None: raise ValueError("must provide either number of bins, step size, or bin boundaries") elif b is not None and step is not None: raise ValueError("cannot provide both number of bins and step size") elif b is not None and bounds is not None: raise ValueError("cannot provide both number of bins and bin boundaries") elif step is not None and bounds is not None: raise ValueError("cannot provide both step size and bin boundaries") xs = np.ascontiguousarray(series, dtype=np.float64) data = xs.ctypes.data_as(POINTER(c_double)) binned = np.empty(xs.shape, dtype=np.int32) out = binned.ctypes.data_as(POINTER(c_int)) e = ErrorCode(0) if b is not None: spec = _inform_bin(data, c_ulong(xs.size), c_int(b), out, byref(e)) elif step is not None: spec = step b = _inform_bin_step(data, c_ulong(xs.size), c_double(step), out, byref(e)) elif bounds is not None: boundaries = np.ascontiguousarray(bounds, dtype=np.float64) bnds = boundaries.ctypes.data_as(POINTER(c_double)) spec = bounds b = _inform_bin_bounds(data, c_ulong(xs.size), bnds, c_ulong(boundaries.size), out, byref(e)) error_guard(e) return binned, b, spec