コード例 #1
0
ファイル: feature.py プロジェクト: dblalock/dig
def _neighborSims1D(seq, length, numNeighbors=100, maxDist=.25):

	seq = seq.flatten()
	X = ar.sliding_window_1D(seq, length)
	numSubseqs = X.shape[0]

	if numNeighbors < 1 or numNeighbors > numSubseqs:
		numNeighbors = numSubseqs

	probs = windowScoresRandWalk(seq, length)

	# select random subseqs
	probs /= np.sum(probs)
	allIdxs = np.arange(numSubseqs)
	startIdxs = _randChoice(allIdxs, numNeighbors, replace=False, p=probs)
	neighbors = X[startIdxs]

	# mean normalize all subseqs
	X = X - np.mean(X, axis=1, keepdims=True)
	neighbors = neighbors - np.mean(neighbors, axis=1, keepdims=True)

	# compute similarity to each shape
	sims = np.zeros((numNeighbors, numSubseqs)) # extra rows for uniform output
	for i, neighbor in enumerate(neighbors):
		variance = np.var(neighbor)
		if variance < ar.DEFAULT_NONZERO_THRESH: # ignore flat neighbors
			continue

		# compute squared dists; would be within [0, 4] if znormed
		diffs = X - neighbor
		dists = np.sum(diffs * diffs, axis=1) / (length * variance)

		sims[i] = (1. - dists) * (dists <= maxDist) # zero out dists > maxDist

	return sims
コード例 #2
0
def windowScoresRandWalk(seq, length, std=-1, numRandWalks=100):
    """
	Computes the Structure Score for each subsequence of length `length`
	within `seq`.

	Parameters
	----------
	seq : 1D array
		1D time series (or a single dimension of an multidimensional time
		series)
	length : int > 0
		The length of subsequences. Each subsequence of this length is
		assigned a score.
	std : float, optional
		The standard deviation to use when creating random walks. Defaults
		to the standard devation of the first discrete derivative of `seq`.
	numRandWalks: int > 0, optional
		The number of random walk sequences to use to compute the score.
		Scores do not appear to become more meaningful for numbers > 100.

	Returns
	-------
	scores : 1D array
		The score for each subsequence of `seq`. This array is scaled by
		its maximimum value, so that the maximum value is always 1.
	"""
    numSubseqs = len(seq) - length + 1

    if length < 4:  # length < 4 is meaninglessly short
        # if m <= 0, n - m + 1 is > n, which makes us return too long an array
        numSubseqs = numSubseqs if length > 0 else len(seq)
        return np.zeros(numSubseqs)

    std = std if std > 0 else np.std(seq[1:] -
                                     seq[:-1])  # std of discrete deriv

    walks = _createRandWalks(numRandWalks, length, std)

    windowScores = np.zeros(numSubseqs)
    subseqs = ar.sliding_window_1D(seq, length)

    for i, subseq in enumerate(subseqs):
        diffs = walks - (subseq - np.mean(subseq))
        dists = np.sum(diffs * diffs, axis=1) / length
        windowScores[i] = np.min(dists)

    return windowScores / np.max(windowScores)  # normalize to max score of 1
コード例 #3
0
ファイル: feature.py プロジェクト: dblalock/dig
def windowScoresRandWalk(seq, length, std=-1, numRandWalks=100):
	"""
	Computes the Structure Score for each subsequence of length `length`
	within `seq`.

	Parameters
	----------
	seq : 1D array
		1D time series (or a single dimension of an multidimensional time
		series)
	length : int > 0
		The length of subsequences. Each subsequence of this length is
		assigned a score.
	std : float, optional
		The standard deviation to use when creating random walks. Defaults
		to the standard devation of the first discrete derivative of `seq`.
	numRandWalks: int > 0, optional
		The number of random walk sequences to use to compute the score.
		Scores do not appear to become more meaningful for numbers > 100.

	Returns
	-------
	scores : 1D array
		The score for each subsequence of `seq`. This array is scaled by
		its maximimum value, so that the maximum value is always 1.
	"""
	numSubseqs = len(seq) - length + 1

	if length < 4: # length < 4 is meaninglessly short
		# if m <= 0, n - m + 1 is > n, which makes us return too long an array
		numSubseqs = numSubseqs if length > 0 else len(seq)
		return np.zeros(numSubseqs)

	std = std if std > 0 else np.std(seq[1:] - seq[:-1]) # std of discrete deriv

	walks = _createRandWalks(numRandWalks, length, std)

	windowScores = np.zeros(numSubseqs)
	subseqs = ar.sliding_window_1D(seq, length)

	for i, subseq in enumerate(subseqs):
		diffs = walks - (subseq - np.mean(subseq))
		dists = np.sum(diffs * diffs, axis=1) / length
		windowScores[i] = np.min(dists)

	return windowScores / np.max(windowScores) # normalize to max score of 1
コード例 #4
0
def _neighborSims1D(seq, length, numNeighbors=100, maxDist=.25):

    seq = seq.flatten()
    X = ar.sliding_window_1D(seq, length)
    numSubseqs = X.shape[0]

    if numNeighbors < 1 or numNeighbors > numSubseqs:
        numNeighbors = numSubseqs

    probs = windowScoresRandWalk(seq, length)

    # select random subseqs
    probs /= np.sum(probs)
    allIdxs = np.arange(numSubseqs)
    startIdxs = _randChoice(allIdxs, numNeighbors, replace=False, p=probs)
    neighbors = X[startIdxs]

    # mean normalize all subseqs
    X = X - np.mean(X, axis=1, keepdims=True)
    neighbors = neighbors - np.mean(neighbors, axis=1, keepdims=True)

    # compute similarity to each shape
    sims = np.zeros(
        (numNeighbors, numSubseqs))  # extra rows for uniform output
    for i, neighbor in enumerate(neighbors):
        variance = np.var(neighbor)
        if variance < ar.DEFAULT_NONZERO_THRESH:  # ignore flat neighbors
            continue

        # compute squared dists; would be within [0, 4] if znormed
        diffs = X - neighbor
        dists = np.sum(diffs * diffs, axis=1) / (length * variance)

        sims[i] = (1. - dists) * (dists <= maxDist)  # zero out dists > maxDist

    return sims