Ejemplo n.º 1
0
def subseqDists(x, y):
    """find the L2^2 distances between y and every subseq of x"""
    y = y.flatten()
    y = (y - np.mean(y)) / np.std(y)

    # flatten Nd input seqs
    origDims = len(x.shape)
    stride = origDims  # TODO allow stepping in more than one direction
    x = x.flatten()

    subseqs = window.sliding_window(x, len(y), stride)
    subseqs = zNormalizeRows(subseqs)

    return distsToRows(subseqs, y)
Ejemplo n.º 2
0
def subseqDists(x, y):
	"""find the L2^2 distances between y and every subseq of x"""
	y = y.flatten()
	y = (y - np.mean(y)) / np.std(y)

	# flatten Nd input seqs
	origDims = len(x.shape)
	stride = origDims # TODO allow stepping in more than one direction
	x = x.flatten()

	subseqs = window.sliding_window(x, len(y), stride)
	subseqs = zNormalizeRows(subseqs)

	return distsToRows(subseqs, y)
Ejemplo n.º 3
0
def uniqueSubseqsInSignals(signal, length, maxDist, norm='each', tree=None):
    X, _, _ = window.flattened_subseqs_of_length(signal, length, norm=norm)
    Xnorm = zNormalizeRows(X, removeZeros=False)

    # print("subseqsInSignals: signal has %d subseqs" % (len(Xnorm)))

    # init kd tree--we can't give it any data yet because we only want to
    # search through seqs that have been added to the dictionary
    if tree is None:
        width = Xnorm.shape[1]
        tree = kd.create(dimensions=width)

    signalOccurIdxs = {}
    tree.add(Xnorm[0], 0)
    for startIdx, subseq in enumerate(Xnorm[1:]):
        if np.sum(subseq * subseq) < .001:  # ignore zero seqs
            continue

        startIdx += 1  # since we skipped Xnorm[0]
        neighbors = tree.search_knn(subseq, 2)
        neighborIdx = -1
        neighborDist = np.inf
        # pull out whichever neighbor isn't the query
        for node, dist in neighbors:
            idx = node.metadata
            if idx != startIdx:
                neighborIdx = idx
                neighborDist = dist
        if neighborIdx < 0:
            print "ERROR: knn returned <2 neighbors..."
            print "Neighbors returned:", neighbors
            assert (0)

        # print "neighborDist", neighborDist, maxDist
        if neighborDist < maxDist:
            # store that the subseq happened at this idx too
            l = signalOccurIdxs.get(neighborIdx, [])
            l.append(startIdx)
            # signalOccurIdxs[neighborIdx] = l
        else:
            # ah, so this can overwrite crap and yield too few features
            signalOccurIdxs[startIdx] = [startIdx]
            tree.add(subseq, startIdx)

        # rebalance if startIdx is a power of 2, so we do so log(N) times
        if 2**int(np.log2(startIdx)) == startIdx:
            # print "rebalancing at start idx %d" % (startIdx,)
            tree.rebalance()

        # signalOccurIdxs[neighborIdx] = [startIdx]
        # if res:
        # 	nn, dist = res
        # 	if dist <= maxDist:
        # 		# store that the subseq happened at this idx too
        # 		neighborID = nn.metadata
        # 		signalOccurIdxs[neighborID].append(startIdx)
        # 		continue
        # neighborID = startIdx
        # signalOccurIdxs[neighborID] = [startIdx]
        # tree.add(subseq, neighborID)

    return signalOccurIdxs, Xnorm  # return Xnorm for convenience, although confusing...
Ejemplo n.º 4
0
def allZNormalizedSubseqs(seqs, length):
    X, _, _ = window.flattened_subseqs_of_length(seqs, length, norm='each')
    return zNormalizeRows(X, removeZeros=False)
Ejemplo n.º 5
0
def uniqueSubseqsInSignals(signal, length, maxDist, norm='each', tree=None):
	X, _, _ = window.flattened_subseqs_of_length(signal, length, norm=norm)
	Xnorm = zNormalizeRows(X, removeZeros=False)

	# print("subseqsInSignals: signal has %d subseqs" % (len(Xnorm)))

	# init kd tree--we can't give it any data yet because we only want to
	# search through seqs that have been added to the dictionary
	if tree is None:
		width = Xnorm.shape[1]
		tree = kd.create(dimensions=width)

	signalOccurIdxs = {}
	tree.add(Xnorm[0], 0)
	for startIdx, subseq in enumerate(Xnorm[1:]):
		if np.sum(subseq*subseq) < .001: # ignore zero seqs
			continue

		startIdx += 1 # since we skipped Xnorm[0]
		neighbors = tree.search_knn(subseq, 2)
		neighborIdx = -1
		neighborDist = np.inf
		# pull out whichever neighbor isn't the query
		for node, dist in neighbors:
			idx = node.metadata
			if idx != startIdx:
				neighborIdx = idx
				neighborDist = dist
		if neighborIdx < 0:
			print "ERROR: knn returned <2 neighbors..."
			print "Neighbors returned:", neighbors
			assert(0)

		# print "neighborDist", neighborDist, maxDist
		if neighborDist < maxDist:
			# store that the subseq happened at this idx too
			l = signalOccurIdxs.get(neighborIdx, [])
			l.append(startIdx)
			# signalOccurIdxs[neighborIdx] = l
		else:
			# ah, so this can overwrite crap and yield too few features
			signalOccurIdxs[startIdx] = [startIdx]
			tree.add(subseq, startIdx)

		# rebalance if startIdx is a power of 2, so we do so log(N) times
		if 2**int(np.log2(startIdx)) == startIdx:
			# print "rebalancing at start idx %d" % (startIdx,)
			tree.rebalance()

		# signalOccurIdxs[neighborIdx] = [startIdx]
		# if res:
		# 	nn, dist = res
		# 	if dist <= maxDist:
		# 		# store that the subseq happened at this idx too
		# 		neighborID = nn.metadata
		# 		signalOccurIdxs[neighborID].append(startIdx)
		# 		continue
		# neighborID = startIdx
		# signalOccurIdxs[neighborID] = [startIdx]
		# tree.add(subseq, neighborID)

	return signalOccurIdxs, Xnorm # return Xnorm for convenience, although confusing...
Ejemplo n.º 6
0
def allZNormalizedSubseqs(seqs, length):
	X, _, _ = window.flattened_subseqs_of_length(seqs, length, norm='each')
	return zNormalizeRows(X, removeZeros=False)