예제 #1
0
    def test_crosscorr(self):
        data_local = array([
            array([1.0, 2.0, -4.0, 5.0, 8.0, 3.0, 4.1, 0.9, 2.3]),
            array([2.0, 2.0, -4.0, 5.0, 3.1, 4.5, 8.2, 8.1, 9.1]),
        ])

        sig = array([1.5, 2.1, -4.2, 5.6, 8.1, 3.9, 4.2, 0.3, 2.1])

        data = self.sc.parallelize(zip(range(1, 3), data_local))

        method = SigProcessingMethod.load("crosscorr", sigfile=sig, lag=0)
        betas = method.calc(data).map(lambda (_, v): v)
        assert (allclose(betas.collect()[0],
                         corrcoef(data_local[0, :], sig)[0, 1]))
        assert (allclose(betas.collect()[1],
                         corrcoef(data_local[1, :], sig)[0, 1]))

        method = SigProcessingMethod.load("crosscorr", sigfile=sig, lag=2)
        betas = method.calc(data).map(lambda (_, v): v)
        tol = 1E-5  # to handle rounding errors
        assert (allclose(betas.collect()[0],
                         array([-0.18511, 0.03817, 0.99221, 0.06567,
                                -0.25750]),
                         atol=tol))
        assert (allclose(betas.collect()[1],
                         array(
                             [-0.35119, -0.14190, 0.44777, -0.00408, 0.45435]),
                         atol=tol))

        betas = crosscorr(data, sig, 0).map(lambda (_, v): v)
        assert (allclose(betas.collect()[0],
                         corrcoef(data_local[0, :], sig)[0, 1]))
        assert (allclose(betas.collect()[1],
                         corrcoef(data_local[1, :], sig)[0, 1]))
예제 #2
0
def crosscorr(data, sigfile, lag):
    """Cross-correlate data points
    (typically time series data)
    against a signal over the specified lags

    :param data: RDD of data points as key value pairs
    :param sigfile: signal to correlate with (string with file location or array)
    :param lag: maximum lag (result will be length 2*lag + 1)

    :return betas: cross-correlations at different time lags
    :return scores: scores from PCA (if lag > 0)
    :return latent: scores from PCA (if lag > 0)
    :return comps: components from PCA (if lag > 0)
    """

    # compute cross correlations
    method = SigProcessingMethod.load("crosscorr", sigfile=sigfile, lag=lag)
    betas = method.calc(data)

    if lag is not 0:
        # do PCA
        scores, latent, comps = svd(betas, 2)
        return betas, scores, latent, comps
    else:
        return betas
예제 #3
0
def crosscorr(data, sigfile, lag):
    """cross-correlate data points
    (typically time series data)
    against a signal over the specified lags

    arguments:
    data - RDD of data points
    sigfile - signal to correlate with (string with file location or array)
    lag - maximum lag (result will be 2*lag + 1)

    returns:
    betas - cross-correlations at different time lags
    scores, latent, comps - result of applying pca if lag > 0
    """

    # compute cross correlations
    method = SigProcessingMethod.load("crosscorr", sigfile=sigfile, lag=lag)
    betas = method.calc(data)

    if lag is not 0:
        # do PCA
        scores, latent, comps = svd(betas, 2)
        return betas, scores, latent, comps
    else:
        return betas
예제 #4
0
def crosscorr(data, sigfile, lag):
    """Cross-correlate data points
    (typically time series data)
    against a signal over the specified lags

    :param data: RDD of data points as key value pairs
    :param sigfile: signal to correlate with (string with file location or array)
    :param lag: maximum lag (result will be length 2*lag + 1)

    :return betas: cross-correlations at different time lags
    :return scores: scores from PCA (if lag > 0)
    :return latent: scores from PCA (if lag > 0)
    :return comps: components from PCA (if lag > 0)
    """

    # compute cross correlations
    method = SigProcessingMethod.load("crosscorr", sigfile=sigfile, lag=lag)
    betas = method.calc(data)

    if lag is not 0:
        # do PCA
        scores, latent, comps = svd(betas, 2)
        return betas, scores, latent, comps
    else:
        return betas
예제 #5
0
파일: stats.py 프로젝트: errord/thunder
def stats(data, statistic):
    """compute summary statistics on every data point

    arguments:
    data - RDD of data points
    mode - which statistic to compute ("median", "mean", "std", "norm")

    returns:
    vals - RDD of statistics
    """

    method = SigProcessingMethod.load("stats", statistic=statistic)
    vals = method.calc(data)

    return vals
예제 #6
0
    def test_crosscorr(self):
        data_local = array([
            array([1.0, 2.0, -4.0, 5.0, 8.0, 3.0, 4.1, 0.9, 2.3]),
            array([2.0, 2.0, -4.0, 5.0, 3.1, 4.5, 8.2, 8.1, 9.1]),
        ])

        sig = array([1.5, 2.1, -4.2, 5.6, 8.1, 3.9, 4.2, 0.3, 2.1])

        data = self.sc.parallelize(data_local)

        method = SigProcessingMethod.load("crosscorr", sigfile=sig, lag=0)
        betas = method.calc(data)
        assert(allclose(betas.collect()[0], corrcoef(data_local[0, :], sig)[0, 1]))
        assert(allclose(betas.collect()[1], corrcoef(data_local[1, :], sig)[0, 1]))

        method = SigProcessingMethod.load("crosscorr", sigfile=sig, lag=2)
        betas = method.calc(data)
        tol = 1E-5  # to handle rounding errors
        assert(allclose(betas.collect()[0], array([-0.18511, 0.03817, 0.99221, 0.06567, -0.25750]), atol=tol))
        assert(allclose(betas.collect()[1], array([-0.35119, -0.14190, 0.44777, -0.00408, 0.45435]), atol=tol))

        betas = crosscorr(data, sig, 0)
        assert(allclose(betas.collect()[0], corrcoef(data_local[0, :], sig)[0, 1]))
        assert(allclose(betas.collect()[1], corrcoef(data_local[1, :], sig)[0, 1]))
예제 #7
0
def stats(data, statistic):
    """compute summary statistics on every data point

    arguments:
    data - RDD of data points
    mode - which statistic to compute ("median", "mean", "std", "norm")

    returns:
    vals - RDD of statistics
    """

    method = SigProcessingMethod.load("stats", statistic=statistic)
    vals = method.calc(data)

    return vals
예제 #8
0
def fourier(data, freq):
    """Compute fourier transform of data points
    (typically time series data)

    :param data: RDD of data points as key value pairs
    :param freq: frequency (number of cycles)

    :return: co: RDD of coherence (normalized amplitude)
    :return: ph: RDD of phase
    """

    method = SigProcessingMethod.load("fourier", freq=freq)
    out = method.calc(data)

    co = out.mapValues(lambda x: x[0])
    ph = out.mapValues(lambda x: x[1])

    return co, ph
예제 #9
0
파일: fourier.py 프로젝트: errord/thunder
def fourier(data, freq):
    """compute fourier transform of data points
    (typically time series data)

    arguments:
    data - RDD of data points
    freq - frequency (number of cycles - 1)

    returns:
    co - RDD of coherence (normalized amplitude)
    ph - RDD of phase
    """

    method = SigProcessingMethod.load("fourier", freq=freq)
    out = method.calc(data).cache()

    co = out.map(lambda x: x[0])
    ph = out.map(lambda x: x[1])

    return co, ph
예제 #10
0
def query(data, indsfile):
    """Query data by averaging together
    data points with the given indices

    :param data: RDD of data points as key value pairs
    :param indsfile: string with file location or array

    :return ts: array with averages
    """
    # load indices
    method = SigProcessingMethod.load("query", indsfile=indsfile)

    # convert to linear indexing
    dims = getdims(data)
    data = subtoind(data, dims.max)

    # loop over indices, averaging time series
    ts = zeros((method.n, len(data.first()[1])))
    for i in range(0, method.n):
        ts[i, :] = data.filter(lambda (k, _): k in method.inds[i]).map(
            lambda (k, x): x).mean()

    return ts
예제 #11
0
파일: query.py 프로젝트: errord/thunder
def query(data, indsfile):
    """query data by averaging together
    data points with the given indices

    arguments:
    data - RDD of data points (pairs of (int,array))
    sigfile - indsfile (string with file location or array)
    lag - maximum lag (result will be 2*lag + 1)

    returns:
    betas - cross-correlations at different time lags
    scores, latent, comps - result of applying pca if lag > 0
    """
    # load indices
    method = SigProcessingMethod.load("query", indsfile=indsfile)

    # loop over indices, averaging time series
    ts = zeros((method.n, len(data.first()[1])))
    for i in range(0, method.n):
        ts[i, :] = data.filter(lambda (k, x): k in method.inds[i]).map(
            lambda (k, x): x).mean()

    return ts
예제 #12
0
def query(data, indsfile):
    """Query data by averaging together
    data points with the given indices

    :param data: RDD of data points as key value pairs
    :param indsfile: string with file location or array

    :return ts: array with averages
    """
    # load indices
    method = SigProcessingMethod.load("query", indsfile=indsfile)

    # convert to linear indexing
    dims = getdims(data)
    data = subtoind(data, dims.max)

    # loop over indices, averaging time series
    ts = zeros((method.n, len(data.first()[1])))
    for i in range(0, method.n):
        indsb = data.context.broadcast(method.inds[i])
        ts[i, :] = data.filter(lambda (k, _): k in indsb.value).map(
            lambda (k, x): x).mean()

    return ts
예제 #13
0
 def __init__(self, sc):
     ThunderDataTest.__init__(self, sc)
     self.method = SigProcessingMethod.load("stats", statistic="std")
예제 #14
0
 def __init__(self, sc):
     ThunderDataTest.__init__(self, sc)
     self.method = SigProcessingMethod.load("fourier", freq=5)
예제 #15
0
 def runtest(self):
     method = SigProcessingMethod.load("crosscorr", sigfile=os.path.join(self.modelfile, "crosscorr"), lag=0)
     betas = method.calc(self.rdd)
     betas.count()