Example #1
0
    def crossCorr(self, signal, lag=0, var=None):
        """
        Cross correlate time series data against another signal

        Parameters
        ----------
        signal : array, or str
            Signal to correlate against, can be a numpy array or a
            MAT file containing the signal as a variable

        var : str
            Variable name if loading from a MAT file

        lag : int
            Range of lags to consider, will cover (-lag, +lag)
        """
        from scipy.linalg import norm

        if type(signal) is str:
            s = loadMatVar(signal, var)
        else:
            s = asarray(signal)

        # standardize signal
        s = s - mean(s)
        s = s / norm(s)

        if size(s) != size(self.index):
            raise Exception('Size of signal to cross correlate with, %g, does not match size of series' % size(s))

        # created a matrix with lagged signals
        if lag is not 0:
            shifts = range(-lag, lag+1)
            d = len(s)
            m = len(shifts)
            sShifted = zeros((m, d))
            for i in range(0, len(shifts)):
                tmp = roll(s, shifts[i])
                if shifts[i] < 0:  # zero padding
                    tmp[(d+shifts[i]):] = 0
                if shifts[i] > 0:
                    tmp[:shifts[i]] = 0
                sShifted[i, :] = tmp
            s = sShifted
        else:
            shifts = 0

        def get(y, s):
            y = y - mean(y)
            n = norm(y)
            if n == 0:
                b = zeros((s.shape[0],))
            else:
                y /= norm(y)
                b = dot(s, y)
            return b

        rdd = self.rdd.mapValues(lambda x: get(x, s))
        return self._constructor(rdd, index=shifts).__finalize__(self)
Example #2
0
    def query(self, inds, var='inds', order='F', isOneBased=True):
        """
        Extract records with indices matching those provided

        Keys will be automatically linearized before matching to provided indices.
        This will not affect

        Parameters
        ----------
        inds : str, or array-like (2D)
            Array of indices, each an array-like of integer indices, or
            filename of a MAT file containing a set of indices as a cell array

        var : str, optional, default = 'inds'
            Variable name if loading from a MAT file

        order : str, optional, default = 'F'
            Specify ordering for linearizing indices (see subtoind)

        onebased : boolean, optional, default = True
            Specify zero or one based indexing for linearizing (see subtoind)

        Returns
        -------
        keys : array, shape (n, k) where k is the length of each value
            Averaged values

        values : array, shape (n, d) where d is the number of keys
            Averaged keys
        """
        if isinstance(inds, str):
            inds = loadMatVar(inds, var)[0]
        else:
            inds = asarray(inds)

        n = len(inds)

        from lambdaimage.rdds.keys import _indToSubConverter
        converter = _indToSubConverter(dims=self.dims.max, order=order, isOneBased=isOneBased)

        keys = zeros((n, len(self.dims.count)))
        values = zeros((n, len(self.first()[1])))

        data = self.subToInd(order=order, isOneBased=isOneBased)

        for idx, indList in enumerate(inds):
            if len(indList) > 0:
                indsSet = set(asarray(indList).flat)
                bcInds = self.rdd.context.broadcast(indsSet)
                values[idx, :] = data.filterOnKeys(lambda k: k in bcInds.value).values().mean()
                keys[idx, :] = mean(map(lambda k: converter(k), indList), axis=0)

        return keys, values
Example #3
0
    def convolve(self, signal, mode='full', var=None):
        """
        Conolve time series data against another signal

        Parameters
        ----------
        signal : array, or str
            Signal to convolve with, can be a numpy array or a
            MAT file containing the signal as a variable

        var : str
            Variable name if loading from a MAT file

        mode : str, optional, default='full'
            Mode of convolution, options are 'full', 'same', and 'same'
        """

        from numpy import convolve

        if type(signal) is str:
            s = loadMatVar(signal, var)
        else:
            s = asarray(signal)

        n = size(self.index)
        m = size(s)

        newrdd = self.rdd.mapValues(lambda x: convolve(x, signal, mode))

        # use expected lengths to make a new index
        if mode == 'same':
            newmax = max(n, m)
        elif mode == 'valid':
            newmax = max(m, n) - min(m, n) + 1
        else:
            newmax = n+m-1
        newindex = arange(0, newmax)

        return self._constructor(newrdd, index=newindex).__finalize__(self)