def _neighborSims1D(seq, length, numNeighbors=100, maxDist=.25): seq = seq.flatten() X = ar.sliding_window_1D(seq, length) numSubseqs = X.shape[0] if numNeighbors < 1 or numNeighbors > numSubseqs: numNeighbors = numSubseqs probs = windowScoresRandWalk(seq, length) # select random subseqs probs /= np.sum(probs) allIdxs = np.arange(numSubseqs) startIdxs = _randChoice(allIdxs, numNeighbors, replace=False, p=probs) neighbors = X[startIdxs] # mean normalize all subseqs X = X - np.mean(X, axis=1, keepdims=True) neighbors = neighbors - np.mean(neighbors, axis=1, keepdims=True) # compute similarity to each shape sims = np.zeros((numNeighbors, numSubseqs)) # extra rows for uniform output for i, neighbor in enumerate(neighbors): variance = np.var(neighbor) if variance < ar.DEFAULT_NONZERO_THRESH: # ignore flat neighbors continue # compute squared dists; would be within [0, 4] if znormed diffs = X - neighbor dists = np.sum(diffs * diffs, axis=1) / (length * variance) sims[i] = (1. - dists) * (dists <= maxDist) # zero out dists > maxDist return sims
def windowScoresRandWalk(seq, length, std=-1, numRandWalks=100): """ Computes the Structure Score for each subsequence of length `length` within `seq`. Parameters ---------- seq : 1D array 1D time series (or a single dimension of an multidimensional time series) length : int > 0 The length of subsequences. Each subsequence of this length is assigned a score. std : float, optional The standard deviation to use when creating random walks. Defaults to the standard devation of the first discrete derivative of `seq`. numRandWalks: int > 0, optional The number of random walk sequences to use to compute the score. Scores do not appear to become more meaningful for numbers > 100. Returns ------- scores : 1D array The score for each subsequence of `seq`. This array is scaled by its maximimum value, so that the maximum value is always 1. """ numSubseqs = len(seq) - length + 1 if length < 4: # length < 4 is meaninglessly short # if m <= 0, n - m + 1 is > n, which makes us return too long an array numSubseqs = numSubseqs if length > 0 else len(seq) return np.zeros(numSubseqs) std = std if std > 0 else np.std(seq[1:] - seq[:-1]) # std of discrete deriv walks = _createRandWalks(numRandWalks, length, std) windowScores = np.zeros(numSubseqs) subseqs = ar.sliding_window_1D(seq, length) for i, subseq in enumerate(subseqs): diffs = walks - (subseq - np.mean(subseq)) dists = np.sum(diffs * diffs, axis=1) / length windowScores[i] = np.min(dists) return windowScores / np.max(windowScores) # normalize to max score of 1
def _neighborSims1D(seq, length, numNeighbors=100, maxDist=.25): seq = seq.flatten() X = ar.sliding_window_1D(seq, length) numSubseqs = X.shape[0] if numNeighbors < 1 or numNeighbors > numSubseqs: numNeighbors = numSubseqs probs = windowScoresRandWalk(seq, length) # select random subseqs probs /= np.sum(probs) allIdxs = np.arange(numSubseqs) startIdxs = _randChoice(allIdxs, numNeighbors, replace=False, p=probs) neighbors = X[startIdxs] # mean normalize all subseqs X = X - np.mean(X, axis=1, keepdims=True) neighbors = neighbors - np.mean(neighbors, axis=1, keepdims=True) # compute similarity to each shape sims = np.zeros( (numNeighbors, numSubseqs)) # extra rows for uniform output for i, neighbor in enumerate(neighbors): variance = np.var(neighbor) if variance < ar.DEFAULT_NONZERO_THRESH: # ignore flat neighbors continue # compute squared dists; would be within [0, 4] if znormed diffs = X - neighbor dists = np.sum(diffs * diffs, axis=1) / (length * variance) sims[i] = (1. - dists) * (dists <= maxDist) # zero out dists > maxDist return sims