예제 #1
0
    def objective(std_noise):
        profile = mp.stomp(sequence, window_size, std_noise=std_noise)[0]

        if np.max(profile) == 0:
            return np.inf

        return np.mean(profile) / np.max(profile)
def compute_mp(ts, window, threshold=None):
    """
    Compute matrix profile at given window
    
    Args:
        ts - array containing time series data
        window - window length
        threshold - threshold for outlier value
    
    Return:
        numpy array - matrix profile
    """
    # sfp - commenting this out as it is unneccesary
    # remove trailing nans of ts
    #i = len(ts) - 1
    #while np.isnan(ts[i]) and i >= 0:
    #    i -= 1
    #ts = ts[0:i+1]

    # compute mp by stamp
    mp = np.array(matrixProfile.stomp(
        ts,
        m=window,
    ))[0]

    # calibrate ts and mp, so mp value is assigned to the middle of that window
    mp_head = np.zeros(window // 2 - 1)
    mp_tail = np.zeros(len(ts) - len(mp) - window // 2 + 1)
    mp = np.concatenate([mp_head, mp, mp_tail])

    # remove error results due to zero std (make them 0 so they don't contribute to outliers)
    ts_std = compute_std(ts, window=window)
    count_zero_std = 0
    for i in range(len(ts_std)):
        if ts_std[i] == 0:
            mp[i] = 0
            count_zero_std += 1

    # compute percentage of outliers, where head, tail and zero std points do not participate
    if not threshold is None:
        outlier = mp[np.where(mp > threshold)]
        outlier_percentage = len(outlier) / (len(mp) - len(mp_head) -
                                             len(mp_tail) - count_zero_std)
        print('outlier %: ' + str(outlier_percentage))

    return mp
예제 #3
0
    def test_extract_regimes(self):
        data_file = os.path.join(MODULE_PATH, '..', 'docs', 'examples',
                                 'rawdata.csv')
        ts = np.loadtxt(data_file, skiprows=1)
        m = 32
        mp, pi = stomp(ts, m)

        cac = fluss(pi, m)

        # test with 3 regimes
        regimes = extract_regimes(cac, m)
        expected = np.array([759, 423, 583])

        np.testing.assert_array_equal(regimes, expected)

        # test with 2 regimes
        regimes = extract_regimes(cac, m, num=2)
        expected = np.array([759, 423])

        np.testing.assert_array_equal(regimes, expected)
예제 #4
0
def compute_mp(ts, window, threshold):
    # remove trailing nans of ts
    i = len(ts) - 1
    while np.isnan(ts[i]) and i >= 0:
        i -= 1
    ts = ts[0:i]
    # compute mp by stamp
    mp = np.array(matrixProfile.stomp(ts, m=window))[0]
    # calibrate ts and mp, so mp value is assigned to the middle of that window
    mp_head = np.zeros(window // 2 - 1)
    mp_tail = np.zeros(len(ts) - len(mp) - window // 2 + 1)
    mp = np.concatenate([mp_head, mp, mp_tail])
    # remove error results due to zero std (make them 0 so they don't contribute to outliers)
    ts_std = compute_std(ts, window=window)
    count_zero_std = 0
    for i in range(len(ts_std)):
        if ts_std[i] == 0:
            mp[i] = 0
            count_zero_std += 1
    # compute percentage of outliers, where head, tail and zero std points do not participate
    outlier = mp[np.where(mp > threshold)]
    outlier_percentage = len(outlier) / (len(mp) - len(mp_head) -
                                         len(mp_tail) - count_zero_std)
    return mp, outlier_percentage
예제 #5
0
 def matrixprofile(self, col, motiflen):
     mp = matrixProfile.stomp(self.data[castlist(col)].values.flatten(), motiflen)
     return self.scatter(y = mp[0], name = 'matrixprofile (len {})'.format(motiflen))
#doc_path = '/home/bhossein/BMBF project/code_resources/matrixprofile-ts-master/'
doc_path = 'C:\Hinkelstien\code_resources\matrixprofile-ts-master/'

data = pd.read_csv(doc_path + 'docs/examples/rawdata.csv')
pattern = data.data.values

#Plot data
fig, ax1 = plt.subplots(figsize=(20, 10))
ax1.plot(np.arange(len(pattern)), pattern, label="Synthetic Data")
legend = ax1.legend(loc='upper right')

#%% Calculate the Matrix Profile

m = 32
mp = matrixProfile.stomp(pattern, m)

# Bonus: calculate the corrected arc curve (CAC) to do semantic segmantation.
cac = fluss.fluss(mp[1], m)

#Append np.nan to Matrix profile to enable plotting against raw data
mp_adj = np.append(mp[0], np.zeros(m - 1) + np.nan)

#Plot the signal data
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True, figsize=(20, 10))
ax1.plot(np.arange(len(pattern)), pattern, label="Synthetic Data")
ax1.set_ylabel('Signal', size=22)

#Plot the Matrix Profile
ax2.plot(np.arange(len(mp_adj)), mp_adj, label="Matrix Profile", color='red')
ax2.set_ylabel('Matrix Profile', size=22)