def subseqDists(x, y): """find the L2^2 distances between y and every subseq of x""" y = y.flatten() y = (y - np.mean(y)) / np.std(y) # flatten Nd input seqs origDims = len(x.shape) stride = origDims # TODO allow stepping in more than one direction x = x.flatten() subseqs = window.sliding_window(x, len(y), stride) subseqs = zNormalizeRows(subseqs) return distsToRows(subseqs, y)
def uniqueSubseqsInSignals(signal, length, maxDist, norm='each', tree=None): X, _, _ = window.flattened_subseqs_of_length(signal, length, norm=norm) Xnorm = zNormalizeRows(X, removeZeros=False) # print("subseqsInSignals: signal has %d subseqs" % (len(Xnorm))) # init kd tree--we can't give it any data yet because we only want to # search through seqs that have been added to the dictionary if tree is None: width = Xnorm.shape[1] tree = kd.create(dimensions=width) signalOccurIdxs = {} tree.add(Xnorm[0], 0) for startIdx, subseq in enumerate(Xnorm[1:]): if np.sum(subseq * subseq) < .001: # ignore zero seqs continue startIdx += 1 # since we skipped Xnorm[0] neighbors = tree.search_knn(subseq, 2) neighborIdx = -1 neighborDist = np.inf # pull out whichever neighbor isn't the query for node, dist in neighbors: idx = node.metadata if idx != startIdx: neighborIdx = idx neighborDist = dist if neighborIdx < 0: print "ERROR: knn returned <2 neighbors..." print "Neighbors returned:", neighbors assert (0) # print "neighborDist", neighborDist, maxDist if neighborDist < maxDist: # store that the subseq happened at this idx too l = signalOccurIdxs.get(neighborIdx, []) l.append(startIdx) # signalOccurIdxs[neighborIdx] = l else: # ah, so this can overwrite crap and yield too few features signalOccurIdxs[startIdx] = [startIdx] tree.add(subseq, startIdx) # rebalance if startIdx is a power of 2, so we do so log(N) times if 2**int(np.log2(startIdx)) == startIdx: # print "rebalancing at start idx %d" % (startIdx,) tree.rebalance() # signalOccurIdxs[neighborIdx] = [startIdx] # if res: # nn, dist = res # if dist <= maxDist: # # store that the subseq happened at this idx too # neighborID = nn.metadata # signalOccurIdxs[neighborID].append(startIdx) # continue # neighborID = startIdx # signalOccurIdxs[neighborID] = [startIdx] # tree.add(subseq, neighborID) return signalOccurIdxs, Xnorm # return Xnorm for convenience, although confusing...
def allZNormalizedSubseqs(seqs, length): X, _, _ = window.flattened_subseqs_of_length(seqs, length, norm='each') return zNormalizeRows(X, removeZeros=False)
def uniqueSubseqsInSignals(signal, length, maxDist, norm='each', tree=None): X, _, _ = window.flattened_subseqs_of_length(signal, length, norm=norm) Xnorm = zNormalizeRows(X, removeZeros=False) # print("subseqsInSignals: signal has %d subseqs" % (len(Xnorm))) # init kd tree--we can't give it any data yet because we only want to # search through seqs that have been added to the dictionary if tree is None: width = Xnorm.shape[1] tree = kd.create(dimensions=width) signalOccurIdxs = {} tree.add(Xnorm[0], 0) for startIdx, subseq in enumerate(Xnorm[1:]): if np.sum(subseq*subseq) < .001: # ignore zero seqs continue startIdx += 1 # since we skipped Xnorm[0] neighbors = tree.search_knn(subseq, 2) neighborIdx = -1 neighborDist = np.inf # pull out whichever neighbor isn't the query for node, dist in neighbors: idx = node.metadata if idx != startIdx: neighborIdx = idx neighborDist = dist if neighborIdx < 0: print "ERROR: knn returned <2 neighbors..." print "Neighbors returned:", neighbors assert(0) # print "neighborDist", neighborDist, maxDist if neighborDist < maxDist: # store that the subseq happened at this idx too l = signalOccurIdxs.get(neighborIdx, []) l.append(startIdx) # signalOccurIdxs[neighborIdx] = l else: # ah, so this can overwrite crap and yield too few features signalOccurIdxs[startIdx] = [startIdx] tree.add(subseq, startIdx) # rebalance if startIdx is a power of 2, so we do so log(N) times if 2**int(np.log2(startIdx)) == startIdx: # print "rebalancing at start idx %d" % (startIdx,) tree.rebalance() # signalOccurIdxs[neighborIdx] = [startIdx] # if res: # nn, dist = res # if dist <= maxDist: # # store that the subseq happened at this idx too # neighborID = nn.metadata # signalOccurIdxs[neighborID].append(startIdx) # continue # neighborID = startIdx # signalOccurIdxs[neighborID] = [startIdx] # tree.add(subseq, neighborID) return signalOccurIdxs, Xnorm # return Xnorm for convenience, although confusing...