Python embedExamples Examples

Programming Language: Python

Namespace/Package Name: ff5

Method/Function: embedExamples

Examples at hotexamples.com: 6

Python embedExamples - 6 examples found. These are the top rated real world Python examples of ff5.embedExamples extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: ff8.py Project: yiweichung/extract

def notSoRandWalkSeq(n=500, exampleLengths=[55, 60, 65], noiseStd=.5):
	seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=16)
	return embedExamples(seq, exampleLengths)

Example #2

Show file

File: ff8.py Project: yiweichung/extract

def randWalkSeq(n=500, exampleLengths=[55, 60, 65], noiseStd=.5):
	seq = synth.randwalk(n, std=noiseStd)
	return embedExamples(seq, exampleLengths)

Example #3

Show file

File: ff6.py Project: yiweichung/extract

def main():
    # np.random.seed(123)

    # ================================ consts for everything
    # consts for generating data
    # n = 1000
    n = 500
    # n = 300
    # length = 8
    # length = 16
    length = 32
    # length = 50
    # nInstances = 3
    exampleLengths = [55, 60, 65]
    # exampleLengths = [60, 60, 60]
    noiseStd = .5

    # consts for algorithm
    Lmin = max(20, length)  # only needed for optimalAlignK() spacing
    Lmax = 100  # loose upper bound on pattern length
    minSim = .5  # loose cutoff for what counts as similar

    # k0 = len(exampleLengths) # for version where we tell it k
    answerIdxs = None

    # ------------------------ synthetic data

    # seq = synth.randconst(n, std=noiseStd)
    # seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=4)
    seq = synth.notSoRandomWalk(n,
                                std=noiseStd,
                                trendFilterLength=80,
                                lpfLength=16)
    seq = embedExamples(seq, exampleLengths)

    # ------------------------ msrc

    from ..datasets import read_msrc as msrc
    idxs = [2]
    # idxs = [0]
    downsampleBy = 2
    recordings = msrc.getRecordings(idxs=idxs)
    r = list(recordings)[0]
    # seq = r.data
    # seq = r.data[:, :40]
    # seq = r.data[:, 20:23]
    seq = r.data[:, 24:27]
    # seq = r.data[:, 20:27]
    print "orig seq shape", seq.shape
    seq = ar.downsampleMat(seq, rowsBy=downsampleBy)
    print "downsampled seq shape", seq.shape
    length = max(8, Lmin / 2)
    Lmin = len(seq) / 20
    Lmax = len(seq) / 10
    # Lmax = len(seq) / 20
    # k0 = 10
    minSim = .5
    answerIdxs = r.gestureIdxs / downsampleBy
    # print "seq shape", seq.shape
    prePadLen = Lmax - length
    postPadLen = length - 1
    first = np.tile(seq[0], (prePadLen, 1))
    last = np.tile(seq[-1], (postPadLen, 1))
    seq = np.vstack(
        (first, seq, last))  # pad with fixed val to allow all window positions
    # ^ TODO pad simMat with zeros instead--this introduces fake subseqs
    answerIdxs += prePadLen
    # seq = np.vstack((seq, np.tile(flat, (length-1, 1)))) # lets it get the last rep
    # print "seq shape", seq.shape

    # r.plot()

    # plt.figure()
    # plt.plot(r.sampleTimes)

    # answerIdxs = r.gestureIdxs / downsampleBy
    # print r.gestureIdxs
    # print answerIdxs

    # plt.figure()
    # plt.plot(seq)
    # for idx in answerIdxs:
    # 	ax = plt.gca()
    # 	viz.plotVertLine(idx, ax=ax)
    # plt.show()

    # return

    # noise = synth.randconst(seq.shape) # add noise for debugging
    # seq = np.r_[noise, seq, noise]

    # ================================ simMat

    X = computeSimMat(seq, length)
    X[X < minSim] = 0.
    # Xorig = np.copy(X)
    X = ff2.localMaxFilterSimMat(X)
    Xblur = ff2.filterSimMat(X,
                             length - 1,
                             'hamming',
                             scaleFilterMethod='max1')
    # Xblur = ff2.filterSimMat(X, Lmin-1, 'hamming', scaleFilterMethod='max1')
    Xblur = np.minimum(Xblur, 1.)

    print "simMat dims:", X.shape
    Xnonzeros = np.count_nonzero(X)
    print "simMat nonzeros, total, frac = ", Xnonzeros, X.size, Xnonzeros / float(
        X.size)

    # ================================ plotting crap

    plt.figure()
    axSeq = plt.subplot2grid((4, 1), (0, 0))
    axSim = plt.subplot2grid((4, 1), (1, 0), rowspan=3)
    for ax in (axSeq, axSim):
        ax.autoscale(tight=True)
    axSeq.plot(seq)
    if answerIdxs is not None:
        for idx in answerIdxs:
            viz.plotVertLine(idx, ax=axSeq)
    Xpad = synth.appendZeros(X, length - 1)
    axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
    # im = axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
    # plt.colorbar(im, cax=axSim)

    axSeq.set_title("Time Series")
    axSim.set_title("Similarities Matrix")

    # plt.figure()
    # plt.imshow(Xorig, interpolation='nearest', aspect='auto')
    # plt.colorbar()

    # plt.figure()
    # plt.imshow(X, interpolation='nearest', aspect='auto')
    # plt.colorbar()

    # # plt.figure()
    # # Xfilt = ff2.localMaxFilterSimMat(X, allowEq=True)
    # # plt.imshow(Xfilt, interpolation='nearest', aspect='auto')
    # # plt.colorbar()

    # # plt.figure()
    # # Xfilt = ff2.localMaxFilterSimMat(X, allowEq=False)
    # # plt.imshow(Xfilt, interpolation='nearest', aspect='auto')
    # # plt.colorbar()

    # plt.figure()
    # plt.imshow(Xblur, interpolation='nearest', aspect='auto')
    # plt.colorbar()

    # plt.show()
    # return

    # ================================ science

    # ------------------------ derived stats
    kMax = int(X.shape[1] / Lmin + .5)
    windowLen = Lmax - length + 1
    # windowShape = (X.shape[0], Lmax)
    # windowSize = np.prod(windowShape)
    nLocs = X.shape[1] - windowLen + 1

    p0 = np.mean(X)  # fraction of entries that are 1 (roughly)

    # intersections = computeIntersections(X, windowLen)
    # windowSims = np.sum(intersections, axis=2)
    # colSims = np.dot(X.T, X)
    colSims = np.dot(X.T, Xblur)
    filt = np.zeros((windowLen, windowLen)) + np.diag(
        np.ones(windowLen))  # zeros except 1s on diag
    windowSims = sig.convolve2d(colSims, filt, mode='valid')

    windowVects = vectorizeWindowLocs(X, windowLen)
    windowVectsBlur = vectorizeWindowLocs(Xblur, windowLen)

    plt.figure()
    plt.imshow(windowSims, interpolation='nearest', aspect='auto')

    # plt.show()
    # return

    # ------------------------ find stuff

    # #
    # # Version where we we tell it k
    # #
    # bsfScore = 0
    # bsfLocs = None
    # bsfIntersection = None
    # # selfSims = np.diagonal(windowSims)
    # # candidateRowIdxs = np.where(selfSims * k0 <= bsfScore)[0]
    # # for i in candidateRowIdxs:
    # # 	row = windowSims[i]
    # for i, row in enumerate(windowSims):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	if windowSims[i, i] * k0 <= bsfScore:
    # 		continue
    # 	idxs = sub.optimalAlignK(row, Lmin, k0)
    # 	intersection = windowVects[i]
    # 	sz = 0
    # 	for idx in idxs:
    # 		intersection = np.minimum(intersection, windowVectsBlur[idx])
    # 		sz = np.sum(intersection)
    # 		if sz * k0 <= bsfScore:
    # 			break
    # 	score = sz * k0
    # 	if score > bsfScore:
    # 		print("window {0}, k={1}, score={2} is the new best!".format(i, k0, score))
    # 		bsfScore = score
    # 		bsfLocs = idxs
    # 		bsfIntersection = np.copy(intersection)

    # #
    # # Version where we look for similarities to orig seq
    # #
    # bsfScore = 0
    # bsfLocs = None
    # bsfIntersection = None
    # for i, row in enumerate(windowSims):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	# early abandon if this location has so little stuff that no
    # 	# intersection with it can possibly beat the best score
    # 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
    # 		# print("immediately abandoning window {}!".format(i))
    # 		continue
    # 	# print("not abandoning window {}!".format(i))
    # 	# best combination of idxs such that none are within Lmin of each other
    # 	idxs = sub.optimalAlignment(row, Lmin)
    # 	# print i, ": ", idxs
    # 	# order idxs by descending order of associated score
    # 	sizes = windowSims[i, idxs]
    # 	sortedSizesOrder = np.argsort(sizes)[::-1]
    # 	sortedIdxs = idxs[sortedSizesOrder]
    # 	# iteratively intersect with another near neighbor, compute the
    # 	# associated score, and check if it's better (or if we can early abandon)
    # 	intersection = windowVects[i]
    # 	numIdxs = len(sortedIdxs)
    # 	for j, idx in enumerate(sortedIdxs):
    # 		k = j + 1
    # 		intersection = np.minimum(intersection, windowVectsBlur[idx])
    # 		sz = np.sum(intersection) # use apodization window
    # 		# sz = np.count_nonzero(intersection) # just max-pool
    # 		score = sz * k
    # 		if k > 1 and score > bsfScore:
    # 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
    # 			print("sortedIdxs = {}".format(str(sortedIdxs)))
    # 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
    # 			print("------------------------")
    # 			bsfScore = score
    # 			bsfLocs = sortedIdxs[:k]
    # 			bsfIntersection = np.copy(intersection)
    # 		# early abandon if this can't possibly beat the best score, which
    # 		# is the case exactly when the intersection is so small that perfect
    # 		# matches at all future locations still wouldn't be good enough
    # 		elif sz * numIdxs <= bsfScore:
    # 			# print("early abandoning window {} at k={}".format(i, k))
    # 			break

    # #
    # # Version where we look for similarities to orig seq and use nearest
    # # enemy dist as M0
    # #
    # bsfScore = 0
    # bsfLocs = None
    # bsfIntersection = None
    # for i, row in enumerate(windowSims):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	# early abandon if this location has so little stuff that no
    # 	# intersection with it can possibly beat the best score
    # 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
    # 		# print("immediately abandoning window {}!".format(i))
    # 		continue

    # 	# best combination of idxs such that none are within Lmin of each other
    # 	# validRow = row[:(-length + 1)] # can't go past end of ts
    # 	# idxs = sub.optimalAlignment(validRow, Lmin)
    # 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

    # 	# order idxs by descending order of associated score
    # 	sizes = windowSims[i, idxs]
    # 	sortedSizesOrder = np.argsort(sizes)[::-1]
    # 	sortedIdxs = idxs[sortedSizesOrder]

    # 	# iteratively intersect with another near neighbor, compute the
    # 	# associated score, and check if it's better (or if we can early abandon)
    # 	intersection = windowVects[i]
    # 	numIdxs = len(sortedIdxs)
    # 	# allZeros = np.zeros(intersection.shape)
    # 	nextIntersection = np.minimum(intersection, windowVectsBlur[sortedIdxs[0]])
    # 	nextSz = np.sum(nextIntersection)
    # 	for j, idx in enumerate(sortedIdxs):
    # 		k = j + 1
    # 		intersection = np.copy(nextIntersection)
    # 		sz = nextSz
    # 		if k < numIdxs:
    # 			nextIdx = sortedIdxs[k] # since k = j+1
    # 			nextIntersection = np.minimum(intersection, windowVectsBlur[nextIdx])
    # 			nextSz = np.sum(nextIntersection) # sum -> use apodization window
    # 		else:
    # 			nextSz = sz * p0

    # 		score = (sz - nextSz) * k
    # 		if k > 1 and score > bsfScore:
    # 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
    # 			print("sortedIdxs = {}".format(str(sortedIdxs)))
    # 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
    # 			print("------------------------")
    # 			bsfScore = score
    # 			bsfLocs = sortedIdxs[:k]
    # 			bsfIntersection = np.copy(intersection)
    # 		# early abandon if this can't possibly beat the best score, which
    # 		# is the case exactly when the intersection is so small that perfect
    # 		# matches at all future locations still wouldn't be good enough
    # 		elif sz * numIdxs <= bsfScore:
    # 			# print("early abandoning window {} at k={}".format(i, k))
    # 			break

    #
    # Version where we look for similarities to orig seq and use nearest
    # enemy dist as M0, and use mean values instead of intersection
    #
    bsfScore = 0
    bsfLocs = None
    bsfIntersection = None
    for i, row in enumerate(windowSims):
        if i % 20 == 0:
            print("computing stuff for row {}".format(i))
        # early abandon if this location has so little stuff that no
        # intersection with it can possibly beat the best score
        if windowSims[
                i,
                i] * kMax <= bsfScore:  # highest score is kMax identical locs
            continue

        # best combination of idxs such that none are within Lmin of each other
        # validRow = row[:(-length + 1)] # can't go past end of ts
        # idxs = sub.optimalAlignment(validRow, Lmin)
        idxs = sub.optimalAlignment(row,
                                    Lmin)  # goes past end of ts, but better

        # order idxs by descending order of associated score
        sizes = windowSims[i, idxs]
        sortedSizesOrder = np.argsort(sizes)[::-1]
        sortedIdxs = idxs[sortedSizesOrder]

        # iteratively intersect with another near neighbor, compute the
        # associated score, and check if it's better (or if we can early abandon)
        intersection = windowVects[i]
        numIdxs = len(sortedIdxs)
        nextSz = np.sum(intersection)
        nextFilt = np.array(intersection, dtype=np.float)
        nextFiltSum = np.array(nextFilt, dtype=np.float)
        for j, idx in enumerate(sortedIdxs):
            k = j + 1
            filt = np.copy(nextFilt)
            sz = nextSz
            if k < numIdxs:
                nextIdx = sortedIdxs[k]  # since k = j+1
                nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
                nextFiltSum += nextIntersection
                nextFilt = nextFiltSum / (
                    k + 1)  # avg value of each feature in intersections
                # nextSz = np.sum(nextFilt) # big even if like no intersection...
                nextSz = np.sum(nextIntersection)
                bigEnoughIntersection = nextIntersection[
                    nextIntersection > minSim]
                nextSz = np.sum(bigEnoughIntersection)
            else:
                nextSz = sz * p0

            score = (sz - nextSz) * k
            if k > 1 and score > bsfScore:
                print("window {0}, k={1}, score={2} is the new best!".format(
                    i, k, score))
                print("sortedIdxs = {}".format(str(sortedIdxs)))
                print("sortedIdxScores = {}".format(
                    str(windowSims[i, sortedIdxs])))
                print("------------------------")
                bsfScore = score
                bsfLocs = sortedIdxs[:k]
                bsfIntersection = np.copy(filt)
            # early abandon if this can't possibly beat the best score, which
            # is the case exactly when the intersection is so small that perfect
            # matches at all future locations still wouldn't be good enough
            elif sz * numIdxs <= bsfScore:
                # TODO can we actually early abandon here? next window loc
                # could increase filt, and thus score for a given loc isn't
                # necessarily non-increasing...
                # 	-can't abandon using this test, but pretty sure there's
                # 	a lower bound to be had here somewhere
                # print("early abandoning window {} at k={}".format(i, k))
                break

    #
    # Version where we look for similarities to orig seq and use nearest
    # enemy dist as M0, and use mean values instead of intersection,
    # and don't sort the indices, but instead care about overlap
    #
    # bsfScore = 0
    # bsfLocs = None
    # bsfIntersection = None
    # for i, row in enumerate(windowSims):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	# early abandon if this location has so little stuff that no
    # 	# intersection with it can possibly beat the best score
    # 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
    # 		continue

    # 	# best combination of idxs such that none are within Lmin of each other
    # 	# validRow = row[:(-length + 1)] # can't go past end of ts
    # 	# idxs = sub.optimalAlignment(validRow, Lmin)
    # 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

    # 	# order idxs by descending order of associated score
    # 	sizes = windowSims[i, idxs]
    # 	sortedSizesOrder = np.argsort(sizes)[::-1]
    # 	sortedIdxs = idxs[sortedSizesOrder]

    # 	# iteratively intersect with another near neighbor, compute the
    # 	# associated score, and check if it's better (or if we can early abandon)
    # 	intersection = windowVects[i]
    # 	numIdxs = len(sortedIdxs)
    # 	nextSz = np.sum(intersection)
    # 	nextFilt = np.array(intersection, dtype=np.float)
    # 	nextFiltSum = np.array(nextFilt, dtype=np.float)
    # 	for j, idx in enumerate(sortedIdxs):
    # 		k = j + 1
    # 		filt = np.copy(nextFilt)
    # 		sz = nextSz
    # 		if k < numIdxs:
    # 			nextIdx = sortedIdxs[k] # since k = j+1
    # 			nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
    # 			nextFiltSum += nextIntersection
    # 			nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
    # 			# nextSz = np.sum(nextFilt) # big even if like no intersection...
    # 			nextSz = np.sum(nextIntersection)
    # 			bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
    # 			nextSz = np.sum(bigEnoughIntersection)
    # 		else:
    # 			nextSz = sz * p0

    # 		score = (sz - nextSz) * k
    # 		if k > 1 and score > bsfScore:
    # 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
    # 			print("sortedIdxs = {}".format(str(sortedIdxs)))
    # 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
    # 			print("------------------------")
    # 			bsfScore = score
    # 			bsfLocs = sortedIdxs[:k]
    # 			bsfIntersection = np.copy(filt)
    # 		# early abandon if this can't possibly beat the best score, which
    # 		# is the case exactly when the intersection is so small that perfect
    # 		# matches at all future locations still wouldn't be good enough
    # 		elif sz * numIdxs <= bsfScore:
    # 			# TODO can we actually early abandon here? next window loc
    # 			# could increase filt, and thus score for a given loc isn't
    # 			# necessarily non-increasing...
    # 			# 	-can't abandon using this test, but pretty sure there's
    # 			# 	a lower bound to be had here somewhere
    # 			# print("early abandoning window {} at k={}".format(i, k))
    # 			break

    # ------------------------ recover original ts

    bsfIntersection *= bsfIntersection >= minSim
    bsfIntersectionWindow = bsfIntersection.reshape((-1, windowLen))
    sums = np.sum(bsfIntersectionWindow, axis=0)

    kBest = len(bsfLocs)
    p0 = np.power(p0, kBest)
    # expectedOnesPerCol = p0 * X.shape[1]
    expectedOnesPerCol = p0 * X.shape[1] * 2
    sums -= expectedOnesPerCol

    plt.plot(sums)

    start, end, _ = maxSubarray(sums)
    patStart, patEnd = start, end + 1 + length

    # ================================ show output

    print "bestScore = {}".format(bsfScore)
    print "bestLocations = {}".format(str(bsfLocs))

    for idx in bsfLocs:
        viz.plotRect(axSim, idx, idx + windowLen)

    # print bsfIntersectionWindow.shape
    # print sums.shape

    # plt.plot(sums)
    # viz.plotRect(plt.gca(), start, end + 1)
    for idx in bsfLocs:
        viz.plotRect(axSeq, idx + patStart, idx + patEnd)

    plt.figure()
    plt.imshow(bsfIntersectionWindow, interpolation='nearest', aspect='auto')

    plt.tight_layout()
    plt.show()

Example #4

Show file

File: ff7.py Project: dblalock/flock

def main():
	# np.random.seed(123)

	# ================================ consts for everything
	# consts for generating data
	# n = 1000
	n = 500
	# n = 300
	# length = 8
	# length = 16
	# length = 32
	# length = 50
	# nInstances = 3
	exampleLengths = [55, 60, 65]
	# exampleLengths = [60, 60, 60]
	noiseStd = .5

	# consts for algorithm
	# Lmin = max(20, length)	# only needed for optimalAlignK() spacing
	Lmin = 20				# only needed for optimalAlignK() spacing
	Lmax = 100				# loose upper bound on pattern length
	# minSim = .5
	minSim = 0.
	length = Lmin // 2
	# length = Lmin // 4
	# length = 3

	answerIdxs = None

	USE_MSRC = True
	# USE_MSRC = False

	# ================================ data

	# ------------------------ synthetic data

	# seq = synth.randconst(n, std=noiseStd)
	seq = synth.randwalk(n, std=noiseStd)
	# seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=4)
	# seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=16)
	seq = embedExamples(seq, exampleLengths)
	# seq = synth.appendZeros(seq, Lmax)

	# ------------------------ msrc

	if USE_MSRC:
		from ..datasets import read_msrc as msrc
		# idxs = [0]
		# idxs = [1]
		# idxs = [2]
		# idxs = [7] # length 1500, but instances of length like 20
		# idxs = [8] # gets owned on this one cuz patterns of length like 100
		# idxs = [9] # missing an annotation, it appears
		idxs = [10] # something crazy about feature rep here # TODO fix
		# idxs = [11] # crap cuz bad, low-variance signals
		# idxs = [12] # has garbagey sections like [10]
		# idxs = [13] # empty feature mat # TODO
		# idxs = [14]
		downsampleBy = 2
		# downsampleBy = 1
		recordings = msrc.getRecordings(idxs=idxs)
		r = list(recordings)[0]
		# seq = r.data
		# seq = r.data[:, :40]
		# seq = r.data[:, 20:23]
		seq = r.data[:, 24:27]
		# seq = r.data[:, 20:27]
		print "orig seq shape", seq.shape
		seq = ar.downsampleMat(seq, rowsBy=downsampleBy)
		print "downsampled seq shape", seq.shape
		# length = max(8, Lmin / 2)
		Lmin = len(seq) // 20
		# Lmax = len(seq) // 8
		Lmax = len(seq) // 10
		length = Lmin // 2
		# Lmax = len(seq) / 20
		# k0 = 10
		# minSim = .5
		answerIdxs = r.gestureIdxs / downsampleBy
		# print "seq shape", seq.shape
		prePadLen = Lmax - length
		# postPadLen = length - 1
		postPadLen = Lmax - length
		first = np.tile(seq[0], (prePadLen, 1))
		last = np.tile(seq[-1], (postPadLen, 1))
		seq = np.vstack((first, seq, last)) # pad with fixed val to allow all window positions
		# ^ TODO pad simMat with zeros instead--this introduces fake subseqs
		answerIdxs += prePadLen
		# seq = np.vstack((seq, np.tile(flat, (length-1, 1)))) # lets it get the last rep
		# print "seq shape", seq.shape

	# ================================ feature construction

	logMaxLength = int(np.floor(np.log2(Lmax)))
	# logMaxLength = int(np.ceil(np.log2(Lmax)))
	# logMinLength = 3 # -> length 8
	# logMinLength = 4 # -> length 16
	logMinLength = int(np.floor(np.log2(Lmin)))
	lengths = np.arange(logMinLength, logMaxLength + 1)
	lengths = 2 ** lengths
	# lengths = [16]

	cardinality = 8
	breakpoints = rep.saxBreakpoints(cardinality)

	X = rep.multiNormalizeAndSparseQuantize(seq, lengths, breakpoints)
	# X = rep.multiSparseLineProject(seq, lengths, breakpoints, removeZeroRows=False)

	# lengths2 = np.arange(3, logMaxLength + 1)
	# lengths2 = 2 ** lengths2
	lengths2 = lengths # TODO uncomment after debug
	# lengths2 = [8, 32]

	# breakpoints2 = rep.defaultSparseLineBreakpoints(seq, scaleHowMany=2)
	breakpoints2 = rep.defaultSparseLineBreakpoints(seq)
	X2 = rep.multiSparseLineProject(seq, lengths2, breakpoints2)
	# X2 = X2 > minSim
	X2 = X2 > 0. # ignore correlations

	# print "shapes:"
	# print X.shape
	# print X2.shape

	X = np.vstack((X, X2))

	# plt.figure()
	# # viz.imshowBetter(X)
	# viz.imshowBetter(X2)
	# plt.figure()
	# viz.imshowBetter(X2 > 0.)
	# plt.show()

	# print seq.shape
	# plt.figure()

	# plt.plot(seq[:,0]) # bit of pattern, but only varies between -.4 and .2

	# okay, so 1st dim is all zeros
	# variances = rep.slidingVariance(seq, 8)
	# for dim in range(len(variances)):
	# 	plt.figure()
	# 	plt.plot(variances[dim].flatten())

	# print variances.shape
	# variances = rep.vstack3Tensor(variances.T)
	# print variances.shape
	# plt.plot(variances)

	# plt.show()
	# return

	X = localMaxFilterSimMat(X)
	# Xbool = np.copy(X)
	featureMeans = np.mean(X, axis=1).reshape((-1, 1))
	# print featureMeans
	X *= -np.log2(featureMeans) # variable encoding costs for rows
	# X /= -np.log(featureMeans)
	# Xblur = localMaxFilterSimMat(X) # try only maxFiltering Xblur
	Xblur = filterSimMat(X, length-1, 'hamming', scaleFilterMethod='max1')

	# plt.figure()
	# viz.imshowBetter(X)
	# plt.figure()
	# viz.imshowBetter(Xblur)

	print "featureMat dims:", X.shape
	Xnonzeros = np.count_nonzero(X)
	print "featureMat nonzeros, total, frac = ", Xnonzeros, X.size, Xnonzeros / float(X.size)

	# plt.show()
	# return

	# ================================ plotting crap

	plt.figure()
	axSeq = plt.subplot2grid((4,1), (0,0))
	axSim = plt.subplot2grid((4,1), (1,0), rowspan=3)
	for ax in (axSeq, axSim):
		ax.autoscale(tight=True)
	axSeq.plot(seq)
	# if answerIdxs is not None:
	# 	for idx in answerIdxs:
	# 		viz.plotVertLine(idx, ax=axSeq)
	padLen = len(seq) - X.shape[1]
	Xpad = synth.appendZeros(X, padLen)
	axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
	# im = axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
	# plt.colorbar(im, cax=axSim)

	axSeq.set_title("Time Series")
	axSim.set_title("Feature Matrix")

	# plt.show()
	# return

	# ================================ science

	# ------------------------ derived stats
	kMax = int(X.shape[1] / Lmin + .5)
	windowLen = Lmax - length + 1

	p0 = np.mean(X) # fraction of entries that are 1 (roughly)
	# p0 = 2 * np.mean(X) # lambda for l0 reg based on features being bernoulli at 2 locs
	minSim = p0
	# p0 = -np.log(np.mean(Xbool)) # fraction of entries that are 1 (roughly)
	# noiseSz = p0 * X.shape[0] * windowLen # way too hard to beat
	expectedOnesPerWindow = p0 * X.shape[0] * windowLen
	noiseSz = p0 * expectedOnesPerWindow # num ones to begin with

	# intersections = computeIntersections(X, windowLen)
	# windowSims = np.sum(intersections, axis=2)
	# colSims = np.dot(X.T, X)
	colSims = np.dot(X.T, Xblur)
	filt = np.zeros((windowLen, windowLen)) + np.diag(np.ones(windowLen)) # zeros except 1s on diag
	windowSims = sig.convolve2d(colSims, filt, mode='valid')

	windowVects = vectorizeWindowLocs(X, windowLen)
	windowVectsBlur = vectorizeWindowLocs(Xblur, windowLen)

	# plt.figure()
	# plt.imshow(windowSims, interpolation='nearest', aspect='auto')

	# ------------------------ find stuff

	#
	# Version where we look for similarities to orig seq and use nearest
	# enemy dist as M0, and use mean values instead of intersection
	#
	bsfScore = 0
	bsfLocs = None
	bsfIntersection = None
	for i, row in enumerate(windowSims):
		if i % 20 == 0:
			print("computing stuff for row {}".format(i))
		# early abandon if this location has so little stuff that no
		# intersection with it can possibly beat the best score
		if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
			continue

		# best combination of idxs such that none are within Lmin of each other
		# validRow = row[:(-length + 1)] # can't go past end of ts
		# idxs = sub.optimalAlignment(validRow, Lmin)
		idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

		# order idxs by descending order of associated score
		sizes = windowSims[i, idxs]
		sortedSizesOrder = np.argsort(sizes)[::-1]
		sortedIdxs = idxs[sortedSizesOrder]

		# iteratively intersect with another near neighbor, compute the
		# associated score, and check if it's better (or if we can early abandon)
		intersection = windowVects[i]
		numIdxs = len(sortedIdxs)
		nextSz = np.sum(intersection)
		nextFilt = np.array(intersection, dtype=np.float)
		nextFiltSum = np.array(nextFilt, dtype=np.float)
		for j, idx in enumerate(sortedIdxs):
			k = j + 1
			filt = np.copy(nextFilt)
			sz = nextSz
			if k < numIdxs:
				nextIdx = sortedIdxs[k] # since k = j+1
				nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
				nextFiltSum += nextIntersection
				nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
				# nextSz = np.sum(nextFilt) # big even if like no intersection...
				nextSz = np.sum(nextIntersection)
				bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
				nextSz = np.sum(bigEnoughIntersection)
			else:
				nextSz = sz * p0
				# nextSz = -1
			enemySz = max(nextSz, noiseSz)

			score = (sz - enemySz) * k
			if k > 1 and score > bsfScore:
				print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
				print("sortedIdxs = {}".format(str(sortedIdxs)))
				print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
				print("------------------------")
				bsfScore = score
				bsfLocs = sortedIdxs[:k]
				bsfIntersection = np.copy(filt)
			# early abandon if this can't possibly beat the best score, which
			# is the case exactly when the intersection is so small that perfect
			# matches at all future locations still wouldn't be good enough
			elif sz * numIdxs <= bsfScore:
				# TODO can we actually early abandon here? next window loc
				# could increase filt, and thus score for a given loc isn't
				# necessarily non-increasing...
				# 	-can't abandon using this test, but pretty sure there's
				# 	a lower bound to be had here somewhere
				# print("early abandoning window {} at k={}".format(i, k))
				break
			elif noiseSz > nextSz:
				break

	# #
	# # Version where we look for similarities to orig seq and use nearest
	# # enemy dist as M0, and use mean values instead of intersection,
	# # and don't sort the indices, but instead care about overlap
	# #
	# bsfScore = 0
	# bsfLocs = None
	# bsfIntersection = None
	# for i, row in enumerate(windowSims):
	# 	if i % 20 == 0:
	# 		print("computing stuff for row {}".format(i))
	# 	# early abandon if this location has so little stuff that no
	# 	# intersection with it can possibly beat the best score
	# 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
	# 		continue

	# 	# best combination of idxs such that none are within Lmin of each other
	# 	# validRow = row[:(-length + 1)] # can't go past end of ts
	# 	# idxs = sub.optimalAlignment(validRow, Lmin)
	# 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

	# 	# order idxs by descending order of associated score
	# 	sizes = windowSims[i, idxs]
	# 	sortedSizesOrder = np.argsort(sizes)[::-1]
	# 	sortedIdxs = idxs[sortedSizesOrder]

	# 	# iteratively intersect with another near neighbor, compute the
	# 	# associated score, and check if it's better (or if we can early abandon)
	# 	intersection = windowVects[i]
	# 	numIdxs = len(sortedIdxs)
	# 	nextSz = np.sum(intersection)
	# 	nextFilt = np.array(intersection, dtype=np.float)
	# 	nextFiltSum = np.array(nextFilt, dtype=np.float)
	# 	for j, idx in enumerate(sortedIdxs):
	# 		k = j + 1
	# 		filt = np.copy(nextFilt)
	# 		sz = nextSz
	# 		if k < numIdxs:
	# 			nextIdx = sortedIdxs[k] # since k = j+1
	# 			nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
	# 			nextFiltSum += nextIntersection
	# 			nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
	# 			# nextSz = np.sum(nextFilt) # big even if like no intersection...
	# 			nextSz = np.sum(nextIntersection)
	# 			bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
	# 			nextSz = np.sum(bigEnoughIntersection)
	# 		else:
	# 			nextSz = sz * p0

	# 		score = (sz - nextSz) * k
	# 		if k > 1 and score > bsfScore:
	# 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
	# 			print("sortedIdxs = {}".format(str(sortedIdxs)))
	# 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
	# 			print("------------------------")
	# 			bsfScore = score
	# 			bsfLocs = sortedIdxs[:k]
	# 			bsfIntersection = np.copy(filt)
	# 		# early abandon if this can't possibly beat the best score, which
	# 		# is the case exactly when the intersection is so small that perfect
	# 		# matches at all future locations still wouldn't be good enough
	# 		elif sz * numIdxs <= bsfScore:
	# 			# TODO can we actually early abandon here? next window loc
	# 			# could increase filt, and thus score for a given loc isn't
	# 			# necessarily non-increasing...
	# 			# 	-can't abandon using this test, but pretty sure there's
	# 			# 	a lower bound to be had here somewhere
	# 			# print("early abandoning window {} at k={}".format(i, k))
	# 			break

	# ------------------------ recover original ts

	bsfIntersection *= bsfIntersection >= minSim
	bsfIntersectionWindow = bsfIntersection.reshape((-1, windowLen))
	sums = np.sum(bsfIntersectionWindow, axis=0)

	kBest = len(bsfLocs)
	p0 = np.power(p0, kBest)
	# expectedOnesPerCol = p0 * X.shape[1] * 2
	# expectedOnesPerCol = p0 * X.shape[1]
	expectedOnesPerCol = p0 * X.shape[0]
	sums -= expectedOnesPerCol

	# plt.figure()
	# plt.plot(sums)

	start, end, _ = maxSubarray(sums)
	# patStart, patEnd = start, end + 1 + length
	patStart, patEnd = start, end + 1
	# patStart, patEnd = start + length // 2, end + 1 + length

	# ================================ show output

	print "bestScore = {}".format(bsfScore)
	print "bestLocations = {}".format(str(bsfLocs))

	for idx in bsfLocs:
		viz.plotRect(axSim, idx, idx+windowLen)

	# print bsfIntersectionWindow.shape
	# print sums.shape

	# plt.plot(sums)
	# viz.plotRect(plt.gca(), start, end + 1)
	for idx in bsfLocs:
		viz.plotRect(axSeq, idx + patStart, idx + patEnd)

	if answerIdxs is not None:
		for idx in answerIdxs:
			viz.plotVertLine(idx, ax=axSeq)

	plt.figure()
	plt.imshow(bsfIntersectionWindow, interpolation='nearest', aspect='auto')

	plt.tight_layout()
	plt.show()

Example #5

Show file

File: ff6.py Project: icdm-extract/extract

def main():
	# np.random.seed(123)

	# ================================ consts for everything
	# consts for generating data
	# n = 1000
	n = 500
	# n = 300
	# length = 8
	# length = 16
	length = 32
	# length = 50
	# nInstances = 3
	exampleLengths = [55, 60, 65]
	# exampleLengths = [60, 60, 60]
	noiseStd = .5

	# consts for algorithm
	Lmin = max(20, length)	# only needed for optimalAlignK() spacing
	Lmax = 100				# loose upper bound on pattern length
	minSim = .5				# loose cutoff for what counts as similar

	# k0 = len(exampleLengths) # for version where we tell it k
	answerIdxs = None

	# ------------------------ synthetic data

	# seq = synth.randconst(n, std=noiseStd)
	# seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=4)
	seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=16)
	seq = embedExamples(seq, exampleLengths)

	# ------------------------ msrc

	from ..datasets import read_msrc as msrc
	idxs = [2]
	# idxs = [0]
	downsampleBy = 2
	recordings = msrc.getRecordings(idxs=idxs)
	r = list(recordings)[0]
	# seq = r.data
	# seq = r.data[:, :40]
	# seq = r.data[:, 20:23]
	seq = r.data[:, 24:27]
	# seq = r.data[:, 20:27]
	print "orig seq shape", seq.shape
	seq = ar.downsampleMat(seq, rowsBy=downsampleBy)
	print "downsampled seq shape", seq.shape
	length = max(8, Lmin / 2)
	Lmin = len(seq) / 20
	Lmax = len(seq) / 10
	# Lmax = len(seq) / 20
	# k0 = 10
	minSim = .5
	answerIdxs = r.gestureIdxs / downsampleBy
	# print "seq shape", seq.shape
	prePadLen = Lmax - length
	postPadLen = length - 1
	first = np.tile(seq[0], (prePadLen, 1))
	last = np.tile(seq[-1], (postPadLen, 1))
	seq = np.vstack((first, seq, last)) # pad with fixed val to allow all window positions
	# ^ TODO pad simMat with zeros instead--this introduces fake subseqs
	answerIdxs += prePadLen
	# seq = np.vstack((seq, np.tile(flat, (length-1, 1)))) # lets it get the last rep
	# print "seq shape", seq.shape


	# r.plot()

	# plt.figure()
	# plt.plot(r.sampleTimes)

	# answerIdxs = r.gestureIdxs / downsampleBy
	# print r.gestureIdxs
	# print answerIdxs

	# plt.figure()
	# plt.plot(seq)
	# for idx in answerIdxs:
	# 	ax = plt.gca()
	# 	viz.plotVertLine(idx, ax=ax)
	# plt.show()

	# return

	# noise = synth.randconst(seq.shape) # add noise for debugging
	# seq = np.r_[noise, seq, noise]

	# ================================ simMat

	X = computeSimMat(seq, length)
	X[X < minSim] = 0.
	# Xorig = np.copy(X)
	X = ff2.localMaxFilterSimMat(X)
	Xblur = ff2.filterSimMat(X, length-1, 'hamming', scaleFilterMethod='max1')
	# Xblur = ff2.filterSimMat(X, Lmin-1, 'hamming', scaleFilterMethod='max1')
	Xblur = np.minimum(Xblur, 1.)

	print "simMat dims:", X.shape
	Xnonzeros = np.count_nonzero(X)
	print "simMat nonzeros, total, frac = ", Xnonzeros, X.size, Xnonzeros / float(X.size)

	# ================================ plotting crap

	plt.figure()
	axSeq = plt.subplot2grid((4,1), (0,0))
	axSim = plt.subplot2grid((4,1), (1,0), rowspan=3)
	for ax in (axSeq, axSim):
		ax.autoscale(tight=True)
	axSeq.plot(seq)
	if answerIdxs is not None:
		for idx in answerIdxs:
			viz.plotVertLine(idx, ax=axSeq)
	Xpad = synth.appendZeros(X, length-1)
	axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
	# im = axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
	# plt.colorbar(im, cax=axSim)

	axSeq.set_title("Time Series")
	axSim.set_title("Similarities Matrix")

	# plt.figure()
	# plt.imshow(Xorig, interpolation='nearest', aspect='auto')
	# plt.colorbar()

	# plt.figure()
	# plt.imshow(X, interpolation='nearest', aspect='auto')
	# plt.colorbar()

	# # plt.figure()
	# # Xfilt = ff2.localMaxFilterSimMat(X, allowEq=True)
	# # plt.imshow(Xfilt, interpolation='nearest', aspect='auto')
	# # plt.colorbar()

	# # plt.figure()
	# # Xfilt = ff2.localMaxFilterSimMat(X, allowEq=False)
	# # plt.imshow(Xfilt, interpolation='nearest', aspect='auto')
	# # plt.colorbar()

	# plt.figure()
	# plt.imshow(Xblur, interpolation='nearest', aspect='auto')
	# plt.colorbar()

	# plt.show()
	# return

	# ================================ science

	# ------------------------ derived stats
	kMax = int(X.shape[1] / Lmin + .5)
	windowLen = Lmax - length + 1
	# windowShape = (X.shape[0], Lmax)
	# windowSize = np.prod(windowShape)
	nLocs = X.shape[1] - windowLen + 1

	p0 = np.mean(X) # fraction of entries that are 1 (roughly)

	# intersections = computeIntersections(X, windowLen)
	# windowSims = np.sum(intersections, axis=2)
	# colSims = np.dot(X.T, X)
	colSims = np.dot(X.T, Xblur)
	filt = np.zeros((windowLen, windowLen)) + np.diag(np.ones(windowLen)) # zeros except 1s on diag
	windowSims = sig.convolve2d(colSims, filt, mode='valid')

	windowVects = vectorizeWindowLocs(X, windowLen)
	windowVectsBlur = vectorizeWindowLocs(Xblur, windowLen)

	plt.figure()
	plt.imshow(windowSims, interpolation='nearest', aspect='auto')

	# plt.show()
	# return

	# ------------------------ find stuff

	# #
	# # Version where we we tell it k
	# #
	# bsfScore = 0
	# bsfLocs = None
	# bsfIntersection = None
	# # selfSims = np.diagonal(windowSims)
	# # candidateRowIdxs = np.where(selfSims * k0 <= bsfScore)[0]
	# # for i in candidateRowIdxs:
	# # 	row = windowSims[i]
	# for i, row in enumerate(windowSims):
	# 	if i % 20 == 0:
	# 		print("computing stuff for row {}".format(i))
	# 	if windowSims[i, i] * k0 <= bsfScore:
	# 		continue
	# 	idxs = sub.optimalAlignK(row, Lmin, k0)
	# 	intersection = windowVects[i]
	# 	sz = 0
	# 	for idx in idxs:
	# 		intersection = np.minimum(intersection, windowVectsBlur[idx])
	# 		sz = np.sum(intersection)
	# 		if sz * k0 <= bsfScore:
	# 			break
	# 	score = sz * k0
	# 	if score > bsfScore:
	# 		print("window {0}, k={1}, score={2} is the new best!".format(i, k0, score))
	# 		bsfScore = score
	# 		bsfLocs = idxs
	# 		bsfIntersection = np.copy(intersection)

	# #
	# # Version where we look for similarities to orig seq
	# #
	# bsfScore = 0
	# bsfLocs = None
	# bsfIntersection = None
	# for i, row in enumerate(windowSims):
	# 	if i % 20 == 0:
	# 		print("computing stuff for row {}".format(i))
	# 	# early abandon if this location has so little stuff that no
	# 	# intersection with it can possibly beat the best score
	# 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
	# 		# print("immediately abandoning window {}!".format(i))
	# 		continue
	# 	# print("not abandoning window {}!".format(i))
	# 	# best combination of idxs such that none are within Lmin of each other
	# 	idxs = sub.optimalAlignment(row, Lmin)
	# 	# print i, ": ", idxs
	# 	# order idxs by descending order of associated score
	# 	sizes = windowSims[i, idxs]
	# 	sortedSizesOrder = np.argsort(sizes)[::-1]
	# 	sortedIdxs = idxs[sortedSizesOrder]
	# 	# iteratively intersect with another near neighbor, compute the
	# 	# associated score, and check if it's better (or if we can early abandon)
	# 	intersection = windowVects[i]
	# 	numIdxs = len(sortedIdxs)
	# 	for j, idx in enumerate(sortedIdxs):
	# 		k = j + 1
	# 		intersection = np.minimum(intersection, windowVectsBlur[idx])
	# 		sz = np.sum(intersection) # use apodization window
	# 		# sz = np.count_nonzero(intersection) # just max-pool
	# 		score = sz * k
	# 		if k > 1 and score > bsfScore:
	# 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
	# 			print("sortedIdxs = {}".format(str(sortedIdxs)))
	# 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
	# 			print("------------------------")
	# 			bsfScore = score
	# 			bsfLocs = sortedIdxs[:k]
	# 			bsfIntersection = np.copy(intersection)
	# 		# early abandon if this can't possibly beat the best score, which
	# 		# is the case exactly when the intersection is so small that perfect
	# 		# matches at all future locations still wouldn't be good enough
	# 		elif sz * numIdxs <= bsfScore:
	# 			# print("early abandoning window {} at k={}".format(i, k))
	# 			break

	# #
	# # Version where we look for similarities to orig seq and use nearest
	# # enemy dist as M0
	# #
	# bsfScore = 0
	# bsfLocs = None
	# bsfIntersection = None
	# for i, row in enumerate(windowSims):
	# 	if i % 20 == 0:
	# 		print("computing stuff for row {}".format(i))
	# 	# early abandon if this location has so little stuff that no
	# 	# intersection with it can possibly beat the best score
	# 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
	# 		# print("immediately abandoning window {}!".format(i))
	# 		continue

	# 	# best combination of idxs such that none are within Lmin of each other
	# 	# validRow = row[:(-length + 1)] # can't go past end of ts
	# 	# idxs = sub.optimalAlignment(validRow, Lmin)
	# 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

	# 	# order idxs by descending order of associated score
	# 	sizes = windowSims[i, idxs]
	# 	sortedSizesOrder = np.argsort(sizes)[::-1]
	# 	sortedIdxs = idxs[sortedSizesOrder]

	# 	# iteratively intersect with another near neighbor, compute the
	# 	# associated score, and check if it's better (or if we can early abandon)
	# 	intersection = windowVects[i]
	# 	numIdxs = len(sortedIdxs)
	# 	# allZeros = np.zeros(intersection.shape)
	# 	nextIntersection = np.minimum(intersection, windowVectsBlur[sortedIdxs[0]])
	# 	nextSz = np.sum(nextIntersection)
	# 	for j, idx in enumerate(sortedIdxs):
	# 		k = j + 1
	# 		intersection = np.copy(nextIntersection)
	# 		sz = nextSz
	# 		if k < numIdxs:
	# 			nextIdx = sortedIdxs[k] # since k = j+1
	# 			nextIntersection = np.minimum(intersection, windowVectsBlur[nextIdx])
	# 			nextSz = np.sum(nextIntersection) # sum -> use apodization window
	# 		else:
	# 			nextSz = sz * p0

	# 		score = (sz - nextSz) * k
	# 		if k > 1 and score > bsfScore:
	# 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
	# 			print("sortedIdxs = {}".format(str(sortedIdxs)))
	# 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
	# 			print("------------------------")
	# 			bsfScore = score
	# 			bsfLocs = sortedIdxs[:k]
	# 			bsfIntersection = np.copy(intersection)
	# 		# early abandon if this can't possibly beat the best score, which
	# 		# is the case exactly when the intersection is so small that perfect
	# 		# matches at all future locations still wouldn't be good enough
	# 		elif sz * numIdxs <= bsfScore:
	# 			# print("early abandoning window {} at k={}".format(i, k))
	# 			break

	#
	# Version where we look for similarities to orig seq and use nearest
	# enemy dist as M0, and use mean values instead of intersection
	#
	bsfScore = 0
	bsfLocs = None
	bsfIntersection = None
	for i, row in enumerate(windowSims):
		if i % 20 == 0:
			print("computing stuff for row {}".format(i))
		# early abandon if this location has so little stuff that no
		# intersection with it can possibly beat the best score
		if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
			continue

		# best combination of idxs such that none are within Lmin of each other
		# validRow = row[:(-length + 1)] # can't go past end of ts
		# idxs = sub.optimalAlignment(validRow, Lmin)
		idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

		# order idxs by descending order of associated score
		sizes = windowSims[i, idxs]
		sortedSizesOrder = np.argsort(sizes)[::-1]
		sortedIdxs = idxs[sortedSizesOrder]

		# iteratively intersect with another near neighbor, compute the
		# associated score, and check if it's better (or if we can early abandon)
		intersection = windowVects[i]
		numIdxs = len(sortedIdxs)
		nextSz = np.sum(intersection)
		nextFilt = np.array(intersection, dtype=np.float)
		nextFiltSum = np.array(nextFilt, dtype=np.float)
		for j, idx in enumerate(sortedIdxs):
			k = j + 1
			filt = np.copy(nextFilt)
			sz = nextSz
			if k < numIdxs:
				nextIdx = sortedIdxs[k] # since k = j+1
				nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
				nextFiltSum += nextIntersection
				nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
				# nextSz = np.sum(nextFilt) # big even if like no intersection...
				nextSz = np.sum(nextIntersection)
				bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
				nextSz = np.sum(bigEnoughIntersection)
			else:
				nextSz = sz * p0

			score = (sz - nextSz) * k
			if k > 1 and score > bsfScore:
				print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
				print("sortedIdxs = {}".format(str(sortedIdxs)))
				print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
				print("------------------------")
				bsfScore = score
				bsfLocs = sortedIdxs[:k]
				bsfIntersection = np.copy(filt)
			# early abandon if this can't possibly beat the best score, which
			# is the case exactly when the intersection is so small that perfect
			# matches at all future locations still wouldn't be good enough
			elif sz * numIdxs <= bsfScore:
				# TODO can we actually early abandon here? next window loc
				# could increase filt, and thus score for a given loc isn't
				# necessarily non-increasing...
				# 	-can't abandon using this test, but pretty sure there's
				# 	a lower bound to be had here somewhere
				# print("early abandoning window {} at k={}".format(i, k))
				break

	#
	# Version where we look for similarities to orig seq and use nearest
	# enemy dist as M0, and use mean values instead of intersection,
	# and don't sort the indices, but instead care about overlap
	#
	# bsfScore = 0
	# bsfLocs = None
	# bsfIntersection = None
	# for i, row in enumerate(windowSims):
	# 	if i % 20 == 0:
	# 		print("computing stuff for row {}".format(i))
	# 	# early abandon if this location has so little stuff that no
	# 	# intersection with it can possibly beat the best score
	# 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
	# 		continue

	# 	# best combination of idxs such that none are within Lmin of each other
	# 	# validRow = row[:(-length + 1)] # can't go past end of ts
	# 	# idxs = sub.optimalAlignment(validRow, Lmin)
	# 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

	# 	# order idxs by descending order of associated score
	# 	sizes = windowSims[i, idxs]
	# 	sortedSizesOrder = np.argsort(sizes)[::-1]
	# 	sortedIdxs = idxs[sortedSizesOrder]

	# 	# iteratively intersect with another near neighbor, compute the
	# 	# associated score, and check if it's better (or if we can early abandon)
	# 	intersection = windowVects[i]
	# 	numIdxs = len(sortedIdxs)
	# 	nextSz = np.sum(intersection)
	# 	nextFilt = np.array(intersection, dtype=np.float)
	# 	nextFiltSum = np.array(nextFilt, dtype=np.float)
	# 	for j, idx in enumerate(sortedIdxs):
	# 		k = j + 1
	# 		filt = np.copy(nextFilt)
	# 		sz = nextSz
	# 		if k < numIdxs:
	# 			nextIdx = sortedIdxs[k] # since k = j+1
	# 			nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
	# 			nextFiltSum += nextIntersection
	# 			nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
	# 			# nextSz = np.sum(nextFilt) # big even if like no intersection...
	# 			nextSz = np.sum(nextIntersection)
	# 			bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
	# 			nextSz = np.sum(bigEnoughIntersection)
	# 		else:
	# 			nextSz = sz * p0

	# 		score = (sz - nextSz) * k
	# 		if k > 1 and score > bsfScore:
	# 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
	# 			print("sortedIdxs = {}".format(str(sortedIdxs)))
	# 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
	# 			print("------------------------")
	# 			bsfScore = score
	# 			bsfLocs = sortedIdxs[:k]
	# 			bsfIntersection = np.copy(filt)
	# 		# early abandon if this can't possibly beat the best score, which
	# 		# is the case exactly when the intersection is so small that perfect
	# 		# matches at all future locations still wouldn't be good enough
	# 		elif sz * numIdxs <= bsfScore:
	# 			# TODO can we actually early abandon here? next window loc
	# 			# could increase filt, and thus score for a given loc isn't
	# 			# necessarily non-increasing...
	# 			# 	-can't abandon using this test, but pretty sure there's
	# 			# 	a lower bound to be had here somewhere
	# 			# print("early abandoning window {} at k={}".format(i, k))
	# 			break

	# ------------------------ recover original ts

	bsfIntersection *= bsfIntersection >= minSim
	bsfIntersectionWindow = bsfIntersection.reshape((-1, windowLen))
	sums = np.sum(bsfIntersectionWindow, axis=0)

	kBest = len(bsfLocs)
	p0 = np.power(p0, kBest)
	# expectedOnesPerCol = p0 * X.shape[1]
	expectedOnesPerCol = p0 * X.shape[1] * 2
	sums -= expectedOnesPerCol

	plt.plot(sums)

	start, end, _ = maxSubarray(sums)
	patStart, patEnd = start, end + 1 + length

	# ================================ show output

	print "bestScore = {}".format(bsfScore)
	print "bestLocations = {}".format(str(bsfLocs))

	for idx in bsfLocs:
		viz.plotRect(axSim, idx, idx+windowLen)

	# print bsfIntersectionWindow.shape
	# print sums.shape

	# plt.plot(sums)
	# viz.plotRect(plt.gca(), start, end + 1)
	for idx in bsfLocs:
		viz.plotRect(axSeq, idx + patStart, idx + patEnd)

	plt.figure()
	plt.imshow(bsfIntersectionWindow, interpolation='nearest', aspect='auto')

	plt.tight_layout()
	plt.show()

Example #6

Show file

def main():
    # np.random.seed(123)

    # ================================ consts for everything
    # consts for generating data
    # n = 1000
    n = 500
    # n = 300
    # length = 8
    # length = 16
    # length = 32
    # length = 50
    # nInstances = 3
    exampleLengths = [55, 60, 65]
    # exampleLengths = [60, 60, 60]
    noiseStd = .5

    # consts for algorithm
    # Lmin = max(20, length)	# only needed for optimalAlignK() spacing
    Lmin = 20  # only needed for optimalAlignK() spacing
    Lmax = 100  # loose upper bound on pattern length
    # minSim = .5
    minSim = 0.
    length = Lmin // 2
    # length = Lmin // 4
    # length = 3

    answerIdxs = None

    USE_MSRC = True
    # USE_MSRC = False

    # ================================ data

    # ------------------------ synthetic data

    # seq = synth.randconst(n, std=noiseStd)
    seq = synth.randwalk(n, std=noiseStd)
    # seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=4)
    # seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=16)
    seq = embedExamples(seq, exampleLengths)
    # seq = synth.appendZeros(seq, Lmax)

    # ------------------------ msrc

    if USE_MSRC:
        from ..datasets import read_msrc as msrc
        # idxs = [0]
        # idxs = [1]
        # idxs = [2]
        # idxs = [7] # length 1500, but instances of length like 20
        # idxs = [8] # gets owned on this one cuz patterns of length like 100
        # idxs = [9] # missing an annotation, it appears
        idxs = [10]  # something crazy about feature rep here # TODO fix
        # idxs = [11] # crap cuz bad, low-variance signals
        # idxs = [12] # has garbagey sections like [10]
        # idxs = [13] # empty feature mat # TODO
        # idxs = [14]
        downsampleBy = 2
        # downsampleBy = 1
        recordings = msrc.getRecordings(idxs=idxs)
        r = list(recordings)[0]
        # seq = r.data
        # seq = r.data[:, :40]
        # seq = r.data[:, 20:23]
        seq = r.data[:, 24:27]
        # seq = r.data[:, 20:27]
        print "orig seq shape", seq.shape
        seq = ar.downsampleMat(seq, rowsBy=downsampleBy)
        print "downsampled seq shape", seq.shape
        # length = max(8, Lmin / 2)
        Lmin = len(seq) // 20
        # Lmax = len(seq) // 8
        Lmax = len(seq) // 10
        length = Lmin // 2
        # Lmax = len(seq) / 20
        # k0 = 10
        # minSim = .5
        answerIdxs = r.gestureIdxs / downsampleBy
        # print "seq shape", seq.shape
        prePadLen = Lmax - length
        # postPadLen = length - 1
        postPadLen = Lmax - length
        first = np.tile(seq[0], (prePadLen, 1))
        last = np.tile(seq[-1], (postPadLen, 1))
        seq = np.vstack(
            (first, seq,
             last))  # pad with fixed val to allow all window positions
        # ^ TODO pad simMat with zeros instead--this introduces fake subseqs
        answerIdxs += prePadLen
        # seq = np.vstack((seq, np.tile(flat, (length-1, 1)))) # lets it get the last rep
        # print "seq shape", seq.shape

    # ================================ feature construction

    logMaxLength = int(np.floor(np.log2(Lmax)))
    # logMaxLength = int(np.ceil(np.log2(Lmax)))
    # logMinLength = 3 # -> length 8
    # logMinLength = 4 # -> length 16
    logMinLength = int(np.floor(np.log2(Lmin)))
    lengths = np.arange(logMinLength, logMaxLength + 1)
    lengths = 2**lengths
    # lengths = [16]

    cardinality = 8
    breakpoints = rep.saxBreakpoints(cardinality)

    X = rep.multiNormalizeAndSparseQuantize(seq, lengths, breakpoints)
    # X = rep.multiSparseLineProject(seq, lengths, breakpoints, removeZeroRows=False)

    # lengths2 = np.arange(3, logMaxLength + 1)
    # lengths2 = 2 ** lengths2
    lengths2 = lengths  # TODO uncomment after debug
    # lengths2 = [8, 32]

    # breakpoints2 = rep.defaultSparseLineBreakpoints(seq, scaleHowMany=2)
    breakpoints2 = rep.defaultSparseLineBreakpoints(seq)
    X2 = rep.multiSparseLineProject(seq, lengths2, breakpoints2)
    # X2 = X2 > minSim
    X2 = X2 > 0.  # ignore correlations

    # print "shapes:"
    # print X.shape
    # print X2.shape

    X = np.vstack((X, X2))

    # plt.figure()
    # # viz.imshowBetter(X)
    # viz.imshowBetter(X2)
    # plt.figure()
    # viz.imshowBetter(X2 > 0.)
    # plt.show()

    # print seq.shape
    # plt.figure()

    # plt.plot(seq[:,0]) # bit of pattern, but only varies between -.4 and .2

    # okay, so 1st dim is all zeros
    # variances = rep.slidingVariance(seq, 8)
    # for dim in range(len(variances)):
    # 	plt.figure()
    # 	plt.plot(variances[dim].flatten())

    # print variances.shape
    # variances = rep.vstack3Tensor(variances.T)
    # print variances.shape
    # plt.plot(variances)

    # plt.show()
    # return

    X = localMaxFilterSimMat(X)
    # Xbool = np.copy(X)
    featureMeans = np.mean(X, axis=1).reshape((-1, 1))
    # print featureMeans
    X *= -np.log2(featureMeans)  # variable encoding costs for rows
    # X /= -np.log(featureMeans)
    # Xblur = localMaxFilterSimMat(X) # try only maxFiltering Xblur
    Xblur = filterSimMat(X, length - 1, 'hamming', scaleFilterMethod='max1')

    # plt.figure()
    # viz.imshowBetter(X)
    # plt.figure()
    # viz.imshowBetter(Xblur)

    print "featureMat dims:", X.shape
    Xnonzeros = np.count_nonzero(X)
    print "featureMat nonzeros, total, frac = ", Xnonzeros, X.size, Xnonzeros / float(
        X.size)

    # plt.show()
    # return

    # ================================ plotting crap

    plt.figure()
    axSeq = plt.subplot2grid((4, 1), (0, 0))
    axSim = plt.subplot2grid((4, 1), (1, 0), rowspan=3)
    for ax in (axSeq, axSim):
        ax.autoscale(tight=True)
    axSeq.plot(seq)
    # if answerIdxs is not None:
    # 	for idx in answerIdxs:
    # 		viz.plotVertLine(idx, ax=axSeq)
    padLen = len(seq) - X.shape[1]
    Xpad = synth.appendZeros(X, padLen)
    axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
    # im = axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
    # plt.colorbar(im, cax=axSim)

    axSeq.set_title("Time Series")
    axSim.set_title("Feature Matrix")

    # plt.show()
    # return

    # ================================ science

    # ------------------------ derived stats
    kMax = int(X.shape[1] / Lmin + .5)
    windowLen = Lmax - length + 1

    p0 = np.mean(X)  # fraction of entries that are 1 (roughly)
    # p0 = 2 * np.mean(X) # lambda for l0 reg based on features being bernoulli at 2 locs
    minSim = p0
    # p0 = -np.log(np.mean(Xbool)) # fraction of entries that are 1 (roughly)
    # noiseSz = p0 * X.shape[0] * windowLen # way too hard to beat
    expectedOnesPerWindow = p0 * X.shape[0] * windowLen
    noiseSz = p0 * expectedOnesPerWindow  # num ones to begin with

    # intersections = computeIntersections(X, windowLen)
    # windowSims = np.sum(intersections, axis=2)
    # colSims = np.dot(X.T, X)
    colSims = np.dot(X.T, Xblur)
    filt = np.zeros((windowLen, windowLen)) + np.diag(
        np.ones(windowLen))  # zeros except 1s on diag
    windowSims = sig.convolve2d(colSims, filt, mode='valid')

    windowVects = vectorizeWindowLocs(X, windowLen)
    windowVectsBlur = vectorizeWindowLocs(Xblur, windowLen)

    # plt.figure()
    # plt.imshow(windowSims, interpolation='nearest', aspect='auto')

    # ------------------------ find stuff

    #
    # Version where we look for similarities to orig seq and use nearest
    # enemy dist as M0, and use mean values instead of intersection
    #
    bsfScore = 0
    bsfLocs = None
    bsfIntersection = None
    for i, row in enumerate(windowSims):
        if i % 20 == 0:
            print("computing stuff for row {}".format(i))
        # early abandon if this location has so little stuff that no
        # intersection with it can possibly beat the best score
        if windowSims[
                i,
                i] * kMax <= bsfScore:  # highest score is kMax identical locs
            continue

        # best combination of idxs such that none are within Lmin of each other
        # validRow = row[:(-length + 1)] # can't go past end of ts
        # idxs = sub.optimalAlignment(validRow, Lmin)
        idxs = sub.optimalAlignment(row,
                                    Lmin)  # goes past end of ts, but better

        # order idxs by descending order of associated score
        sizes = windowSims[i, idxs]
        sortedSizesOrder = np.argsort(sizes)[::-1]
        sortedIdxs = idxs[sortedSizesOrder]

        # iteratively intersect with another near neighbor, compute the
        # associated score, and check if it's better (or if we can early abandon)
        intersection = windowVects[i]
        numIdxs = len(sortedIdxs)
        nextSz = np.sum(intersection)
        nextFilt = np.array(intersection, dtype=np.float)
        nextFiltSum = np.array(nextFilt, dtype=np.float)
        for j, idx in enumerate(sortedIdxs):
            k = j + 1
            filt = np.copy(nextFilt)
            sz = nextSz
            if k < numIdxs:
                nextIdx = sortedIdxs[k]  # since k = j+1
                nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
                nextFiltSum += nextIntersection
                nextFilt = nextFiltSum / (
                    k + 1)  # avg value of each feature in intersections
                # nextSz = np.sum(nextFilt) # big even if like no intersection...
                nextSz = np.sum(nextIntersection)
                bigEnoughIntersection = nextIntersection[
                    nextIntersection > minSim]
                nextSz = np.sum(bigEnoughIntersection)
            else:
                nextSz = sz * p0
                # nextSz = -1
            enemySz = max(nextSz, noiseSz)

            score = (sz - enemySz) * k
            if k > 1 and score > bsfScore:
                print("window {0}, k={1}, score={2} is the new best!".format(
                    i, k, score))
                print("sortedIdxs = {}".format(str(sortedIdxs)))
                print("sortedIdxScores = {}".format(
                    str(windowSims[i, sortedIdxs])))
                print("------------------------")
                bsfScore = score
                bsfLocs = sortedIdxs[:k]
                bsfIntersection = np.copy(filt)
            # early abandon if this can't possibly beat the best score, which
            # is the case exactly when the intersection is so small that perfect
            # matches at all future locations still wouldn't be good enough
            elif sz * numIdxs <= bsfScore:
                # TODO can we actually early abandon here? next window loc
                # could increase filt, and thus score for a given loc isn't
                # necessarily non-increasing...
                # 	-can't abandon using this test, but pretty sure there's
                # 	a lower bound to be had here somewhere
                # print("early abandoning window {} at k={}".format(i, k))
                break
            elif noiseSz > nextSz:
                break

    # #
    # # Version where we look for similarities to orig seq and use nearest
    # # enemy dist as M0, and use mean values instead of intersection,
    # # and don't sort the indices, but instead care about overlap
    # #
    # bsfScore = 0
    # bsfLocs = None
    # bsfIntersection = None
    # for i, row in enumerate(windowSims):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	# early abandon if this location has so little stuff that no
    # 	# intersection with it can possibly beat the best score
    # 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
    # 		continue

    # 	# best combination of idxs such that none are within Lmin of each other
    # 	# validRow = row[:(-length + 1)] # can't go past end of ts
    # 	# idxs = sub.optimalAlignment(validRow, Lmin)
    # 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

    # 	# order idxs by descending order of associated score
    # 	sizes = windowSims[i, idxs]
    # 	sortedSizesOrder = np.argsort(sizes)[::-1]
    # 	sortedIdxs = idxs[sortedSizesOrder]

    # 	# iteratively intersect with another near neighbor, compute the
    # 	# associated score, and check if it's better (or if we can early abandon)
    # 	intersection = windowVects[i]
    # 	numIdxs = len(sortedIdxs)
    # 	nextSz = np.sum(intersection)
    # 	nextFilt = np.array(intersection, dtype=np.float)
    # 	nextFiltSum = np.array(nextFilt, dtype=np.float)
    # 	for j, idx in enumerate(sortedIdxs):
    # 		k = j + 1
    # 		filt = np.copy(nextFilt)
    # 		sz = nextSz
    # 		if k < numIdxs:
    # 			nextIdx = sortedIdxs[k] # since k = j+1
    # 			nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
    # 			nextFiltSum += nextIntersection
    # 			nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
    # 			# nextSz = np.sum(nextFilt) # big even if like no intersection...
    # 			nextSz = np.sum(nextIntersection)
    # 			bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
    # 			nextSz = np.sum(bigEnoughIntersection)
    # 		else:
    # 			nextSz = sz * p0

    # 		score = (sz - nextSz) * k
    # 		if k > 1 and score > bsfScore:
    # 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
    # 			print("sortedIdxs = {}".format(str(sortedIdxs)))
    # 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
    # 			print("------------------------")
    # 			bsfScore = score
    # 			bsfLocs = sortedIdxs[:k]
    # 			bsfIntersection = np.copy(filt)
    # 		# early abandon if this can't possibly beat the best score, which
    # 		# is the case exactly when the intersection is so small that perfect
    # 		# matches at all future locations still wouldn't be good enough
    # 		elif sz * numIdxs <= bsfScore:
    # 			# TODO can we actually early abandon here? next window loc
    # 			# could increase filt, and thus score for a given loc isn't
    # 			# necessarily non-increasing...
    # 			# 	-can't abandon using this test, but pretty sure there's
    # 			# 	a lower bound to be had here somewhere
    # 			# print("early abandoning window {} at k={}".format(i, k))
    # 			break

    # ------------------------ recover original ts

    bsfIntersection *= bsfIntersection >= minSim
    bsfIntersectionWindow = bsfIntersection.reshape((-1, windowLen))
    sums = np.sum(bsfIntersectionWindow, axis=0)

    kBest = len(bsfLocs)
    p0 = np.power(p0, kBest)
    # expectedOnesPerCol = p0 * X.shape[1] * 2
    # expectedOnesPerCol = p0 * X.shape[1]
    expectedOnesPerCol = p0 * X.shape[0]
    sums -= expectedOnesPerCol

    # plt.figure()
    # plt.plot(sums)

    start, end, _ = maxSubarray(sums)
    # patStart, patEnd = start, end + 1 + length
    patStart, patEnd = start, end + 1
    # patStart, patEnd = start + length // 2, end + 1 + length

    # ================================ show output

    print "bestScore = {}".format(bsfScore)
    print "bestLocations = {}".format(str(bsfLocs))

    for idx in bsfLocs:
        viz.plotRect(axSim, idx, idx + windowLen)

    # print bsfIntersectionWindow.shape
    # print sums.shape

    # plt.plot(sums)
    # viz.plotRect(plt.gca(), start, end + 1)
    for idx in bsfLocs:
        viz.plotRect(axSeq, idx + patStart, idx + patEnd)

    if answerIdxs is not None:
        for idx in answerIdxs:
            viz.plotVertLine(idx, ax=axSeq)

    plt.figure()
    plt.imshow(bsfIntersectionWindow, interpolation='nearest', aspect='auto')

    plt.tight_layout()
    plt.show()