Esempio n. 1
0
def test_tsb_not_one_dimensional():
    tsb = np.array([[1, 1], [2, 2]])
    ts = np.arange(10)
    w = 32

    with pytest.raises(ValueError) as excinfo:
        mpdist(ts, tsb, w)
        assert ('ts_b must be one dimensional!' == str(excinfo.value))
Esempio n. 2
0
def test_against_matlab_parallel():
    ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt'))
    tsb = ts[199:300]
    w = 32

    desired = 1.460009659995543e-07
    actual = mpdist(ts, tsb, w, n_jobs=-1)

    np.testing.assert_almost_equal(actual, desired)
Esempio n. 3
0
def test_small_series_multi_threaded():
    ts = np.array([1, 2, 3, 1, 2, 3, 4, 5, 6, 0, 0, 1, 1, 2, 2, 4, 5, 1, 1,
                   9]).astype('d')
    query = np.array([
        0.23595094, 0.9865171, 0.1934413, 0.60880883, 0.55174926, 0.77139988,
        0.33529215, 0.63215848
    ]).astype('d')
    w = 4

    desired = 0.437690617625298
    actual = mpdist(ts, query, w, n_jobs=-1)

    np.testing.assert_almost_equal(actual, desired)
def compute_dist(args):
    """
    Helper function to parallelize pairwise distance calculation.

    Parameters
    ----------
    args : tuple
        The arguments to pass to the mpdist calculation.
    
    Returns
    -------
    values : tuple
        The kth index and distance.
    """
    k = args[0]
    distance = mpdist(args[1], args[2], args[3], threshold=args[4])

    return (k, distance)
Esempio n. 5
0
def test_invalid_threshold():
    ts = np.arange(100)
    tsb = np.arange(100)
    w = 32
    threshold = -1
    error = 'threshold must be a float greater than 0 and less than 1'

    with pytest.raises(ValueError) as excinfo:
        mpdist(ts, tsb, w, threshold=threshold)
        assert (error == str(excinfo.value))

    threshold = 'str'
    with pytest.raises(ValueError) as excinfo:
        mpdist(ts, tsb, w, threshold=threshold)
        assert (error == str(excinfo.value))

    threshold = 1
    with pytest.raises(ValueError) as excinfo:
        mpdist(ts, tsb, w, threshold=threshold)
        assert (error == str(excinfo.value))
def pairwise_dist(X, window_size, threshold=0.05, n_jobs=1):
    """
    Utility function to compute all pairwise distances between the timeseries
    using MPDist. 
    
    Note
    ----
    scipy.spatial.distance.pdist cannot be used because they
    do not allow for jagged arrays, however their code was used as a reference
    in creating this function.
    https://github.com/scipy/scipy/blob/master/scipy/spatial/distance.py#L2039

    Parameters
    ----------
    X : array_like
        An array_like object containing time series to compute distances for.
    window_size : int
        The window size to use in computing the MPDist.
    threshold : float
        The threshold used to compute MPDist.
    n_jobs : int
        Number of CPU cores to use during computation.
    
    Returns
    -------
    Y : np.ndarray
        Returns a condensed distance matrix Y.  For
        each :math:`i` and :math:`j` (where :math:`i<j<m`),where m is the 
        number of original observations. The metric ``dist(u=X[i], v=X[j])``
        is computed and stored in entry ``ij``.
    """
    if not core.is_array_like(X):
        raise ValueError('X must be array_like!')

    # identify shape based on iterable or np.ndarray.shape
    m = 0

    if isinstance(X, np.ndarray) and len(X.shape) == 2:
        m = X.shape[0]
    else:
        m = len(X)

    dm = np.empty((m * (m - 1)) // 2, dtype=np.double)
    k = 0

    if n_jobs == 1:
        for i in range(0, m - 1):
            for j in range(i + 1, m):
                dm[k] = mpdist(X[i],
                               X[j],
                               window_size,
                               threshold=threshold,
                               n_jobs=n_jobs)
                k = k + 1
    else:
        args = []
        for i in range(0, m - 1):
            for j in range(i + 1, m):
                args.append((k, X[i], X[j], window_size, threshold))
                k = k + 1

        with core.mp_pool()(n_jobs) as pool:
            results = pool.map(compute_dist, args)

        # put results in the matrix
        for result in results:
            dm[result[0]] = result[1]

    return dm