def test_tsb_not_one_dimensional(): tsb = np.array([[1, 1], [2, 2]]) ts = np.arange(10) w = 32 with pytest.raises(ValueError) as excinfo: mpdist(ts, tsb, w) assert ('ts_b must be one dimensional!' == str(excinfo.value))
def test_against_matlab_parallel(): ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) tsb = ts[199:300] w = 32 desired = 1.460009659995543e-07 actual = mpdist(ts, tsb, w, n_jobs=-1) np.testing.assert_almost_equal(actual, desired)
def test_small_series_multi_threaded(): ts = np.array([1, 2, 3, 1, 2, 3, 4, 5, 6, 0, 0, 1, 1, 2, 2, 4, 5, 1, 1, 9]).astype('d') query = np.array([ 0.23595094, 0.9865171, 0.1934413, 0.60880883, 0.55174926, 0.77139988, 0.33529215, 0.63215848 ]).astype('d') w = 4 desired = 0.437690617625298 actual = mpdist(ts, query, w, n_jobs=-1) np.testing.assert_almost_equal(actual, desired)
def compute_dist(args): """ Helper function to parallelize pairwise distance calculation. Parameters ---------- args : tuple The arguments to pass to the mpdist calculation. Returns ------- values : tuple The kth index and distance. """ k = args[0] distance = mpdist(args[1], args[2], args[3], threshold=args[4]) return (k, distance)
def test_invalid_threshold(): ts = np.arange(100) tsb = np.arange(100) w = 32 threshold = -1 error = 'threshold must be a float greater than 0 and less than 1' with pytest.raises(ValueError) as excinfo: mpdist(ts, tsb, w, threshold=threshold) assert (error == str(excinfo.value)) threshold = 'str' with pytest.raises(ValueError) as excinfo: mpdist(ts, tsb, w, threshold=threshold) assert (error == str(excinfo.value)) threshold = 1 with pytest.raises(ValueError) as excinfo: mpdist(ts, tsb, w, threshold=threshold) assert (error == str(excinfo.value))
def pairwise_dist(X, window_size, threshold=0.05, n_jobs=1): """ Utility function to compute all pairwise distances between the timeseries using MPDist. Note ---- scipy.spatial.distance.pdist cannot be used because they do not allow for jagged arrays, however their code was used as a reference in creating this function. https://github.com/scipy/scipy/blob/master/scipy/spatial/distance.py#L2039 Parameters ---------- X : array_like An array_like object containing time series to compute distances for. window_size : int The window size to use in computing the MPDist. threshold : float The threshold used to compute MPDist. n_jobs : int Number of CPU cores to use during computation. Returns ------- Y : np.ndarray Returns a condensed distance matrix Y. For each :math:`i` and :math:`j` (where :math:`i<j<m`),where m is the number of original observations. The metric ``dist(u=X[i], v=X[j])`` is computed and stored in entry ``ij``. """ if not core.is_array_like(X): raise ValueError('X must be array_like!') # identify shape based on iterable or np.ndarray.shape m = 0 if isinstance(X, np.ndarray) and len(X.shape) == 2: m = X.shape[0] else: m = len(X) dm = np.empty((m * (m - 1)) // 2, dtype=np.double) k = 0 if n_jobs == 1: for i in range(0, m - 1): for j in range(i + 1, m): dm[k] = mpdist(X[i], X[j], window_size, threshold=threshold, n_jobs=n_jobs) k = k + 1 else: args = [] for i in range(0, m - 1): for j in range(i + 1, m): args.append((k, X[i], X[j], window_size, threshold)) k = k + 1 with core.mp_pool()(n_jobs) as pool: results = pool.map(compute_dist, args) # put results in the matrix for result in results: dm[result[0]] = result[1] return dm