def crossCorr(self, signal, lag=0, var=None): """ Cross correlate time series data against another signal Parameters ---------- signal : array, or str Signal to correlate against, can be a numpy array or a MAT file containing the signal as a variable var : str Variable name if loading from a MAT file lag : int Range of lags to consider, will cover (-lag, +lag) """ from scipy.linalg import norm if type(signal) is str: s = loadMatVar(signal, var) else: s = asarray(signal) # standardize signal s = s - mean(s) s = s / norm(s) if size(s) != size(self.index): raise Exception('Size of signal to cross correlate with, %g, does not match size of series' % size(s)) # created a matrix with lagged signals if lag is not 0: shifts = range(-lag, lag+1) d = len(s) m = len(shifts) sShifted = zeros((m, d)) for i in range(0, len(shifts)): tmp = roll(s, shifts[i]) if shifts[i] < 0: # zero padding tmp[(d+shifts[i]):] = 0 if shifts[i] > 0: tmp[:shifts[i]] = 0 sShifted[i, :] = tmp s = sShifted else: shifts = 0 def get(y, s): y = y - mean(y) n = norm(y) if n == 0: b = zeros((s.shape[0],)) else: y /= norm(y) b = dot(s, y) return b rdd = self.rdd.mapValues(lambda x: get(x, s)) return self._constructor(rdd, index=shifts).__finalize__(self)
def query(self, inds, var='inds', order='F', isOneBased=True): """ Extract records with indices matching those provided Keys will be automatically linearized before matching to provided indices. This will not affect Parameters ---------- inds : str, or array-like (2D) Array of indices, each an array-like of integer indices, or filename of a MAT file containing a set of indices as a cell array var : str, optional, default = 'inds' Variable name if loading from a MAT file order : str, optional, default = 'F' Specify ordering for linearizing indices (see subtoind) onebased : boolean, optional, default = True Specify zero or one based indexing for linearizing (see subtoind) Returns ------- keys : array, shape (n, k) where k is the length of each value Averaged values values : array, shape (n, d) where d is the number of keys Averaged keys """ if isinstance(inds, str): inds = loadMatVar(inds, var)[0] else: inds = asarray(inds) n = len(inds) from lambdaimage.rdds.keys import _indToSubConverter converter = _indToSubConverter(dims=self.dims.max, order=order, isOneBased=isOneBased) keys = zeros((n, len(self.dims.count))) values = zeros((n, len(self.first()[1]))) data = self.subToInd(order=order, isOneBased=isOneBased) for idx, indList in enumerate(inds): if len(indList) > 0: indsSet = set(asarray(indList).flat) bcInds = self.rdd.context.broadcast(indsSet) values[idx, :] = data.filterOnKeys(lambda k: k in bcInds.value).values().mean() keys[idx, :] = mean(map(lambda k: converter(k), indList), axis=0) return keys, values
def convolve(self, signal, mode='full', var=None): """ Conolve time series data against another signal Parameters ---------- signal : array, or str Signal to convolve with, can be a numpy array or a MAT file containing the signal as a variable var : str Variable name if loading from a MAT file mode : str, optional, default='full' Mode of convolution, options are 'full', 'same', and 'same' """ from numpy import convolve if type(signal) is str: s = loadMatVar(signal, var) else: s = asarray(signal) n = size(self.index) m = size(s) newrdd = self.rdd.mapValues(lambda x: convolve(x, signal, mode)) # use expected lengths to make a new index if mode == 'same': newmax = max(n, m) elif mode == 'valid': newmax = max(m, n) - min(m, n) + 1 else: newmax = n+m-1 newindex = arange(0, newmax) return self._constructor(newrdd, index=newindex).__finalize__(self)