def test_crosscorr(self): data_local = array([ array([1.0, 2.0, -4.0, 5.0, 8.0, 3.0, 4.1, 0.9, 2.3]), array([2.0, 2.0, -4.0, 5.0, 3.1, 4.5, 8.2, 8.1, 9.1]), ]) sig = array([1.5, 2.1, -4.2, 5.6, 8.1, 3.9, 4.2, 0.3, 2.1]) data = self.sc.parallelize(zip(range(1, 3), data_local)) method = SigProcessingMethod.load("crosscorr", sigfile=sig, lag=0) betas = method.calc(data).map(lambda (_, v): v) assert (allclose(betas.collect()[0], corrcoef(data_local[0, :], sig)[0, 1])) assert (allclose(betas.collect()[1], corrcoef(data_local[1, :], sig)[0, 1])) method = SigProcessingMethod.load("crosscorr", sigfile=sig, lag=2) betas = method.calc(data).map(lambda (_, v): v) tol = 1E-5 # to handle rounding errors assert (allclose(betas.collect()[0], array([-0.18511, 0.03817, 0.99221, 0.06567, -0.25750]), atol=tol)) assert (allclose(betas.collect()[1], array( [-0.35119, -0.14190, 0.44777, -0.00408, 0.45435]), atol=tol)) betas = crosscorr(data, sig, 0).map(lambda (_, v): v) assert (allclose(betas.collect()[0], corrcoef(data_local[0, :], sig)[0, 1])) assert (allclose(betas.collect()[1], corrcoef(data_local[1, :], sig)[0, 1]))
def crosscorr(data, sigfile, lag): """Cross-correlate data points (typically time series data) against a signal over the specified lags :param data: RDD of data points as key value pairs :param sigfile: signal to correlate with (string with file location or array) :param lag: maximum lag (result will be length 2*lag + 1) :return betas: cross-correlations at different time lags :return scores: scores from PCA (if lag > 0) :return latent: scores from PCA (if lag > 0) :return comps: components from PCA (if lag > 0) """ # compute cross correlations method = SigProcessingMethod.load("crosscorr", sigfile=sigfile, lag=lag) betas = method.calc(data) if lag is not 0: # do PCA scores, latent, comps = svd(betas, 2) return betas, scores, latent, comps else: return betas
def crosscorr(data, sigfile, lag): """cross-correlate data points (typically time series data) against a signal over the specified lags arguments: data - RDD of data points sigfile - signal to correlate with (string with file location or array) lag - maximum lag (result will be 2*lag + 1) returns: betas - cross-correlations at different time lags scores, latent, comps - result of applying pca if lag > 0 """ # compute cross correlations method = SigProcessingMethod.load("crosscorr", sigfile=sigfile, lag=lag) betas = method.calc(data) if lag is not 0: # do PCA scores, latent, comps = svd(betas, 2) return betas, scores, latent, comps else: return betas
def stats(data, statistic): """compute summary statistics on every data point arguments: data - RDD of data points mode - which statistic to compute ("median", "mean", "std", "norm") returns: vals - RDD of statistics """ method = SigProcessingMethod.load("stats", statistic=statistic) vals = method.calc(data) return vals
def test_crosscorr(self): data_local = array([ array([1.0, 2.0, -4.0, 5.0, 8.0, 3.0, 4.1, 0.9, 2.3]), array([2.0, 2.0, -4.0, 5.0, 3.1, 4.5, 8.2, 8.1, 9.1]), ]) sig = array([1.5, 2.1, -4.2, 5.6, 8.1, 3.9, 4.2, 0.3, 2.1]) data = self.sc.parallelize(data_local) method = SigProcessingMethod.load("crosscorr", sigfile=sig, lag=0) betas = method.calc(data) assert(allclose(betas.collect()[0], corrcoef(data_local[0, :], sig)[0, 1])) assert(allclose(betas.collect()[1], corrcoef(data_local[1, :], sig)[0, 1])) method = SigProcessingMethod.load("crosscorr", sigfile=sig, lag=2) betas = method.calc(data) tol = 1E-5 # to handle rounding errors assert(allclose(betas.collect()[0], array([-0.18511, 0.03817, 0.99221, 0.06567, -0.25750]), atol=tol)) assert(allclose(betas.collect()[1], array([-0.35119, -0.14190, 0.44777, -0.00408, 0.45435]), atol=tol)) betas = crosscorr(data, sig, 0) assert(allclose(betas.collect()[0], corrcoef(data_local[0, :], sig)[0, 1])) assert(allclose(betas.collect()[1], corrcoef(data_local[1, :], sig)[0, 1]))
def fourier(data, freq): """Compute fourier transform of data points (typically time series data) :param data: RDD of data points as key value pairs :param freq: frequency (number of cycles) :return: co: RDD of coherence (normalized amplitude) :return: ph: RDD of phase """ method = SigProcessingMethod.load("fourier", freq=freq) out = method.calc(data) co = out.mapValues(lambda x: x[0]) ph = out.mapValues(lambda x: x[1]) return co, ph
def fourier(data, freq): """compute fourier transform of data points (typically time series data) arguments: data - RDD of data points freq - frequency (number of cycles - 1) returns: co - RDD of coherence (normalized amplitude) ph - RDD of phase """ method = SigProcessingMethod.load("fourier", freq=freq) out = method.calc(data).cache() co = out.map(lambda x: x[0]) ph = out.map(lambda x: x[1]) return co, ph
def query(data, indsfile): """Query data by averaging together data points with the given indices :param data: RDD of data points as key value pairs :param indsfile: string with file location or array :return ts: array with averages """ # load indices method = SigProcessingMethod.load("query", indsfile=indsfile) # convert to linear indexing dims = getdims(data) data = subtoind(data, dims.max) # loop over indices, averaging time series ts = zeros((method.n, len(data.first()[1]))) for i in range(0, method.n): ts[i, :] = data.filter(lambda (k, _): k in method.inds[i]).map( lambda (k, x): x).mean() return ts
def query(data, indsfile): """query data by averaging together data points with the given indices arguments: data - RDD of data points (pairs of (int,array)) sigfile - indsfile (string with file location or array) lag - maximum lag (result will be 2*lag + 1) returns: betas - cross-correlations at different time lags scores, latent, comps - result of applying pca if lag > 0 """ # load indices method = SigProcessingMethod.load("query", indsfile=indsfile) # loop over indices, averaging time series ts = zeros((method.n, len(data.first()[1]))) for i in range(0, method.n): ts[i, :] = data.filter(lambda (k, x): k in method.inds[i]).map( lambda (k, x): x).mean() return ts
def query(data, indsfile): """Query data by averaging together data points with the given indices :param data: RDD of data points as key value pairs :param indsfile: string with file location or array :return ts: array with averages """ # load indices method = SigProcessingMethod.load("query", indsfile=indsfile) # convert to linear indexing dims = getdims(data) data = subtoind(data, dims.max) # loop over indices, averaging time series ts = zeros((method.n, len(data.first()[1]))) for i in range(0, method.n): indsb = data.context.broadcast(method.inds[i]) ts[i, :] = data.filter(lambda (k, _): k in indsb.value).map( lambda (k, x): x).mean() return ts
def __init__(self, sc): ThunderDataTest.__init__(self, sc) self.method = SigProcessingMethod.load("stats", statistic="std")
def __init__(self, sc): ThunderDataTest.__init__(self, sc) self.method = SigProcessingMethod.load("fourier", freq=5)
def runtest(self): method = SigProcessingMethod.load("crosscorr", sigfile=os.path.join(self.modelfile, "crosscorr"), lag=0) betas = method.calc(self.rdd) betas.count()