Esempio n. 1
0
def discretize(dataset,
               filter='L_T1',
               readings_per_letter=1,
               alphabet_size=3,
               sliding=0,
               move=1,
               plot=0):
    column = dataset[filter]
    s = SAX(len(column) / readings_per_letter, alphabet_size)

    # get the letter representation of the data
    if sliding:
        (xString, xIndices) = s.sliding_window(
            column, sliding, move)  # get letter rep for sliding windows
    else:
        (xString, xIndices) = s.to_letter_rep(column[1:(1 + len(column))])

    # construct a column with letter representations
    sax = []
    for i in range(0, len(xString)):
        for x in range(0, readings_per_letter):
            sax.insert((i * x) + 1, int(xString[i]))

    if plot:
        # plot the original and the letter represented values
        fig, ax1 = pyplot.subplots()
        ax1.plot(column[1:(1 + len(column))])
        ax2 = ax1.twinx()
        ax2.plot(sax, 'r.')
        pyplot.show()

    return xString
Esempio n. 2
0
 def __init__(self, N=800, symbols_per_word=4, alphabet="abcd"):
     AnomalyDetector.__init__(self)
     self.N = N
     self.symbols_per_word = symbols_per_word
     self.data_buffer = []
     self.apply_count = N
     self.sax = SAX()
     self.alphabet = alphabet
Esempio n. 3
0
    def dist(self, p, q):
        self.distCalls += 1
        s = SAX(wordSize=min(len(self.timeseries[p[0]:p[1]]),
                             len(self.timeseries[q[0]:q[1]])))
        #normp = s.normalize(self.timeseries[p[0]:p[1]])
        #normq = s.normalize(self.timeseries[q[0]:q[1]])
        normp = []
        normq = []
        start = time.time()
        if len(self.timeseries[p[0]:p[1]]) > len(self.timeseries[q[0]:q[1]]):
            normp = s.normalize(s.to_PAA(self.timeseries[p[0]:p[1]])[0])
            normq = s.normalize(self.timeseries[q[0]:q[1]])
        elif len(self.timeseries[q[0]:q[1]]) > len(self.timeseries[p[0]:p[1]]):
            normq = s.normalize(s.to_PAA(self.timeseries[q[0]:q[1]])[0])
            normp = s.normalize(self.timeseries[p[0]:p[1]])
        else:
            normp = s.normalize(self.timeseries[p[0]:p[1]])
            normq = s.normalize(self.timeseries[q[0]:q[1]])

        sqval = 0.0
        for a, b in zip(normp, normq):
            sqval += (a - b)**2
        sqval = math.sqrt(sqval)

        dist = sqval / float(len(normp))
        end = time.time()
        self.totalDistTime += (end - start)
        return dist
Esempio n. 4
0
def main():
    train1_df = pd.read_csv('BATADAL_dataset03.csv', index_col='DATETIME')
    train2_df = pd.read_csv('BATADAL_dataset04.csv', index_col=0)
    train1_df.index = pd.to_datetime(train1_df.index, dayfirst=True)
    train2_df['n_gram'] = np.zeros(len(train2_df))
    for col in ['L_T1', 'F_PU11', 'S_PU6']:
        window_size = 10
        word_size = 3
        alphabet_size = 3
        stride = 1
        sax = SAX(wordSize=word_size, alphabetSize=alphabet_size)

        # for column_name in train1_df:
        train_string_rep, train_window_indices = sax.sliding_window(
            train1_df[col].values, cover=window_size, stride=stride)
        train_string_rep2, train_window_indices2 = sax.sliding_window(
            train2_df[col].values, cover=window_size, stride=stride)

        threshold = 1e-6

        model = n_gram_model(train_string_rep)
        anomalies, probabilities = n_gram_predict(model, train_string_rep2,
                                                  train_window_indices2,
                                                  threshold, window_size)
        print('window: {}, word: {}, alphabet: {}, threshold: {}'.format(
            window_size, word_size, alphabet_size, threshold))

        train2_df['ATT_FLAG_anom'] = np.where(train2_df['ATT_FLAG'] == 1, 100,
                                              0)
        train2_df['n_gram'] += probabilities

    train2_df['n_gram'] = np.where(train2_df['n_gram'] > 0, 1, 0)

    train2_df['diff'] = train2_df['ATT_FLAG_anom'] - train2_df['n_gram']

    arr = train2_df['diff'].value_counts()

    TTD = utils.TDD_metric(train2_df, probabilities)
    TP = arr[99]
    FP = arr[-1]
    TN = arr[0]
    FN = arr[100]
    S_CM = utils.S_cm(TP, FP, TN, FN)

    accuracy = (TP + TN) / len(train2_df)
    precision = TP / (TP + FP)

    print('accuracy: {}, precision: {}'.format(accuracy, precision))
    print('TDD: {}'.format(TTD))
    print('S_cm: {}'.format(S_CM))
    print('Ranked: {}'.format(0.5 * TTD + 0.5 * S_CM))
Esempio n. 5
0
class TSBitmaps(AnomalyDetector):
    def __init__(self, lag_window=8, lead_window=4, anomaly_threshold=0.5, N=1600, n=400, alphabet="abcd", bitmap_level=2):
        super(TSBitmaps, self).__init__()
        self.alphabet = alphabet
        self.lagging_tsb = TimeseriesBitmap(self.alphabet, bitmap_level, lag_window)
        self.leading_tsb = TimeseriesBitmap(self.alphabet, bitmap_level, lead_window)
        self.sax = SAX()
        self.N = N # size of the feature window
        #self.n = n # size of the symbol section
        self.symbols_per_word = self.N / n
        self.data_buffer = []
        self.word_buffer = []
        self.anomaly_threshold = anomaly_threshold
            
    def apply(self, x):
        self.data_buffer.append(x)
        if len(self.data_buffer) == self.N:            
            self.word_buffer.append(self.sax.convert(self.data_buffer, self.alphabet, self.symbols_per_word))
            del self.data_buffer[0]
            if len(self.word_buffer) == self.leading_tsb.window_size:                    
                self.lagging_tsb.update(self.word_buffer[0])
                self.leading_tsb.update(self.word_buffer[-1])
                del self.word_buffer[0]
                return self.lagging_tsb.getAnomalyScore(self.leading_tsb)
        return 0
    
    
    def detect(self, x, spirit_weights=None):
        anomaly_level = self.apply(x)
        if anomaly_level > self.anomaly_threshold:
            return Anomaly(anomaly_level, spirit_weights)
        return None
Esempio n. 6
0
def HOTSAX(T, n, w, a, num_discords=1):
    ED_counter = 0
    T_sax = SAX(T, n, w, a)
    words, counts = np.unique(T_sax, return_counts=True)

    best_so_far_dist = 0
    best_so_far_loc = None
    l = len(T)

    outer = np.concatenate(
        [np.where(T_sax == words[i])[0] for i in np.argsort(counts)])
    for p in outer:
        nearest_neighbour_dist = np.Inf
        inner = np.concatenate((np.where(T_sax == T_sax[p])[0],
                                np.random.choice(l - n,
                                                 size=l - n,
                                                 replace=False)))
        for q in inner:
            if np.abs(p - q) >= n:
                ED_counter += 1
                dist = distance.euclidean(T[p:p + n], T[q:q + n])
                nearest_neighbour_dist = min(dist, nearest_neighbour_dist)
                if nearest_neighbour_dist < best_so_far_dist:
                    break
        if nearest_neighbour_dist > best_so_far_dist:
            best_so_far_dist = nearest_neighbour_dist
            best_so_far_loc = p

    return (best_so_far_dist, best_so_far_loc, ED_counter)
Esempio n. 7
0
 def __init__(self, N = 800, symbols_per_word = 4, alphabet = "abcd"):
     AnomalyDetector.__init__(self)
     self.N = N
     self.symbols_per_word = symbols_per_word
     self.data_buffer = []
     self.apply_count = N
     self.sax = SAX()
     self.alphabet = alphabet        
Esempio n. 8
0
def main(args):
    if len(args) < 2:
        print "Usage: python %s {filename}" % (args[0])
        exit()    
    
    filename = args[1]
        
    print "Reading..."
    data = []
    with open(filename) as f:
        for line in f:
            data.append(float(line.split()[1]))
    print "Read %d lines." % (len(data))
    
    
    print "Converting to SAX..."
    sax = SAX()
    N = 1600 # size of the sliding window
    n = 400 # size of a symbol
    lag_window = 8
    lead_window = 4
    bitmap_level = 2
    alphabet = "abcd"
    chunks = slidingWindow(data, N, step=n)
    words = []
    for chunk in chunks:
        word = sax.convert(chunk, alphabet, n)
        words.append(word)
    
    print "Anomaly detection..."
    with open("bitmap_anomaly.txt", "w") as out:        
        bitmap1 = TimeseriesBitmap(alphabet, bitmap_level, lag_window)
        bitmap2 = TimeseriesBitmap(alphabet, bitmap_level, lead_window)        
        
        for i in xrange(len(words) - (lag_window + lead_window)):
            bitmap1.update(words[i])
            bitmap2.update(words[i + lag_window])            
            if i >= lag_window:
                score = bitmap1.getAnomalyScore(bitmap2)
            else:
                score = 0.0
            
            for _ in xrange(n):    
                out.write("%s\n" % score)
Esempio n. 9
0
 def __init__(self, lag_window=8, lead_window=4, anomaly_threshold=0.5, N=1600, n=400, alphabet="abcd", bitmap_level=2):
     super(TSBitmaps, self).__init__()
     self.alphabet = alphabet
     self.lagging_tsb = TimeseriesBitmap(self.alphabet, bitmap_level, lag_window)
     self.leading_tsb = TimeseriesBitmap(self.alphabet, bitmap_level, lead_window)
     self.sax = SAX()
     self.N = N # size of the feature window
     #self.n = n # size of the symbol section
     self.symbols_per_word = self.N / n
     self.data_buffer = []
     self.word_buffer = []
     self.anomaly_threshold = anomaly_threshold
Esempio n. 10
0
 def __init__(self,
              lag_window=8,
              lead_window=4,
              anomaly_threshold=0.5,
              N=1600,
              n=400,
              alphabet="abcd",
              bitmap_level=2):
     super(TSBitmaps, self).__init__()
     self.alphabet = alphabet
     self.lagging_tsb = TimeseriesBitmap(self.alphabet, bitmap_level,
                                         lag_window)
     self.leading_tsb = TimeseriesBitmap(self.alphabet, bitmap_level,
                                         lead_window)
     self.sax = SAX()
     self.N = N  # size of the feature window
     #self.n = n # size of the symbol section
     self.symbols_per_word = self.N / n
     self.data_buffer = []
     self.word_buffer = []
     self.anomaly_threshold = anomaly_threshold
Esempio n. 11
0
class testSax(unittest.TestCase):
    def setUp(self):
        self.sax = SAX()
        self.delta = 1.0e-10

    def testEuclideanDistance(self):
        sig1 = [i for i in xrange(100)]
        sig2 = [i + 0.5 for i in xrange(100)]
        lse = self.sax.euclidean_dist(sig1, sig2)
        assert lse == 5.0

    def testNormalizeOnRandom(self):
        orig_sig = [random.uniform(0, 1) for _ in xrange(1000)]
        sig = self.sax.normalize(orig_sig)

        # properly Z-normalized signal should have a mean
        # very close to 0 and standard deviation very close to 1.0
        assert abs(np.mean(sig)) < self.delta
        assert abs(np.std(sig) - 1.0) < self.delta

    def testPAA(self):
        siglen = 100
        M = 10
        orig_sig = [random.uniform(0, 1) for _ in xrange(siglen)]
        paa_sig = self.sax.to_PAA(orig_sig, M)

        self.assertEquals(len(paa_sig), M)
        self.assertEquals(np.mean(orig_sig[:M]), paa_sig[0])

    def testPAAexample(self):
        orig_sig = [
            2.02, 2.33, 2.99, 6.85, 9.20, 8.80, 7.50, 6.00, 5.85, 3.85, 4.85,
            3.85, 2.22, 1.45, 1.34
        ]
        M = 9
        paa_sig = self.sax.to_PAA(orig_sig, M)
        res_sig = [
            -0.9327168, -0.3699053, 1.383673, 1.391248, 0.6299752, 0.01641218,
            -0.05933634, -0.8387886, -1.220561
        ]

        assert len(paa_sig) == len(res_sig)

        M = 5
        paa_sig = self.sax.to_PAA(orig_sig, M)
        res_sig2 = [-0.9379922, -0.0857173, 0.4738943, 1.444949, -0.8951336]

        assert len(paa_sig) == len(res_sig2)

    def testPAAZero(self):
        orig_sig = [0] * 20
        paa_sig = self.sax.to_PAA(orig_sig, 5)
        self.assertTrue(not any(paa_sig))
        self.assertEqual(self.sax.convert(orig_sig, "abcd"), "aaaaaaaa")
Esempio n. 12
0
class HOTSAXDetectorBruteForce(AnomalyDetector):
    def __init__(self, N=800, symbols_per_word=4, alphabet="abcd"):
        AnomalyDetector.__init__(self)
        self.N = N
        self.symbols_per_word = symbols_per_word
        self.data_buffer = []
        self.apply_count = N
        self.sax = SAX()
        self.alphabet = alphabet

    def apply(self, x):
        self.data_buffer.append(x)
        if len(self.data_buffer) == self.N:
            word = self.sax.convert(self.data_buffer, self.alphabet,
                                    self.symbols_per_word)
            del self.data_buffer[0]
Esempio n. 13
0
class testSax(unittest.TestCase):
    def setUp(self):
        self.sax = SAX()
        self.delta = 1.0e-10

    def testEuclideanDistance(self):
        sig1 = [ i for i in xrange(100) ]
        sig2 = [ i + 0.5 for i in xrange(100) ]
        lse = self.sax.euclidean_dist(sig1, sig2)
        assert lse == 5.0

    def testNormalizeOnRandom(self):
        orig_sig = [ random.uniform(0, 1) for _ in xrange(1000) ]
        sig = self.sax.normalize(orig_sig)

        # properly Z-normalized signal should have a mean
        # very close to 0 and standard deviation very close to 1.0
        assert abs(np.mean(sig)) < self.delta
        assert abs(np.std(sig) - 1.0) < self.delta

    def testPAA(self):
        siglen = 100
        M = 10
        orig_sig = [ random.uniform(0, 1) for _ in xrange(siglen) ]
        paa_sig = self.sax.to_PAA(orig_sig, M)

        self.assertEquals(len(paa_sig), M)
        self.assertEquals(np.mean(orig_sig[:M]), paa_sig[0])

    def testPAAexample(self):
        orig_sig = [2.02, 2.33, 2.99, 6.85, 9.20, 8.80, 7.50, 6.00, 5.85, 3.85, 4.85, 3.85, 2.22, 1.45, 1.34]
        M = 9
        paa_sig = self.sax.to_PAA(orig_sig, M)
        res_sig = [-0.9327168, -0.3699053, 1.383673, 1.391248, 0.6299752, 0.01641218, -0.05933634, -0.8387886, -1.220561]

        assert len(paa_sig) == len(res_sig)

        M = 5
        paa_sig = self.sax.to_PAA(orig_sig, M)
        res_sig2 = [-0.9379922, -0.0857173, 0.4738943, 1.444949, -0.8951336]

        assert len(paa_sig) == len(res_sig2)
    
    def testPAAZero(self):
        orig_sig = [0] * 20
        paa_sig = self.sax.to_PAA(orig_sig, 5)        
        self.assertTrue(not any(paa_sig))
        self.assertEqual(self.sax.convert(orig_sig, "abcd"), "aaaaaaaa")
Esempio n. 14
0
class HOTSAXDetectorBruteForce(AnomalyDetector):
    def __init__(self, N = 800, symbols_per_word = 4, alphabet = "abcd"):
        AnomalyDetector.__init__(self)
        self.N = N
        self.symbols_per_word = symbols_per_word
        self.data_buffer = []
        self.apply_count = N
        self.sax = SAX()
        self.alphabet = alphabet        
    
    def apply(self, x):    
        self.data_buffer.append(x)
        if len(self.data_buffer) == self.N:
            word = self.sax.convert(self.data_buffer, self.alphabet, self.symbols_per_word)
            del self.data_buffer[0]
            
            
            
Esempio n. 15
0
class TSBitmaps(AnomalyDetector):
    def __init__(self,
                 lag_window=8,
                 lead_window=4,
                 anomaly_threshold=0.5,
                 N=1600,
                 n=400,
                 alphabet="abcd",
                 bitmap_level=2):
        super(TSBitmaps, self).__init__()
        self.alphabet = alphabet
        self.lagging_tsb = TimeseriesBitmap(self.alphabet, bitmap_level,
                                            lag_window)
        self.leading_tsb = TimeseriesBitmap(self.alphabet, bitmap_level,
                                            lead_window)
        self.sax = SAX()
        self.N = N  # size of the feature window
        #self.n = n # size of the symbol section
        self.symbols_per_word = self.N / n
        self.data_buffer = []
        self.word_buffer = []
        self.anomaly_threshold = anomaly_threshold

    def apply(self, x):
        self.data_buffer.append(x)
        if len(self.data_buffer) == self.N:
            self.word_buffer.append(
                self.sax.convert(self.data_buffer, self.alphabet,
                                 self.symbols_per_word))
            del self.data_buffer[0]
            if len(self.word_buffer) == self.leading_tsb.window_size:
                self.lagging_tsb.update(self.word_buffer[0])
                self.leading_tsb.update(self.word_buffer[-1])
                del self.word_buffer[0]
                return self.lagging_tsb.getAnomalyScore(self.leading_tsb)
        return 0

    def detect(self, x, spirit_weights=None):
        anomaly_level = self.apply(x)
        if anomaly_level > self.anomaly_threshold:
            return Anomaly(anomaly_level, spirit_weights)
        return None
Esempio n. 16
0
 def setUp(self):
     self.sax = SAX()
     self.delta = 1.0e-10
Esempio n. 17
0
def main():
    train1_df = pd.read_csv('BATADAL_dataset03.csv', index_col='DATETIME')
    train2_df = pd.read_csv('BATADAL_dataset04.csv', index_col=0)
    # test_df = pd.read_csv('BATADAL_test_dataset.csv', index_col=0)
    train1_df.index = pd.to_datetime(train1_df.index, dayfirst=True)
    labels = []
    train2_df['n_gram'] = np.zeros(len(train2_df))
    for col in [
            'L_T1', 'F_PU11', 'S_PU6'
    ]:  #'L_T4', 'L_T7', 'S_PU10', 'S_PU11', 'F_PU10', 'F_PU2', 'F_PU6', 'F_PU7 'S_PU2',
        window_size = 10
        word_size = 3
        alphabet_size = 3
        stride = 1
        sax = SAX(wordSize=word_size, alphabetSize=alphabet_size)

        # for column_name in train1_df:
        train_string_rep, train_window_indices = sax.sliding_window(
            train1_df[col].values, cover=window_size, stride=stride)
        train_string_rep2, train_window_indices2 = sax.sliding_window(
            train2_df[col].values, cover=window_size, stride=stride)

        threshold = 1e-6

        # print(train_string_rep)
        # print(train_window_indices)

        # print(np.shape(train_string_rep2))

        model = n_gram_model(train_string_rep)
        anomalies, probabilities = n_gram_predict(model, train_string_rep2,
                                                  train_window_indices2,
                                                  threshold, window_size)
        print('window: {}, word: {}, alphabet: {}, threshold: {}'.format(
            window_size, word_size, alphabet_size, threshold))

        # print(anomalies)

        # print(np.shape(probabilities))
        # print(np.shape(train2_df.values))
        plt.clf()
        train2_df['ATT_FLAG_anom'] = np.where(train2_df['ATT_FLAG'] == 1, 100,
                                              0)
        # ax = train2_df['ATT_FLAG_anom'].plot(grid=True, color='r', label='Anomaly')
        # labels += probabilities
        train2_df['n_gram'] += probabilities

        # ax2 = train2_df['n_gram'].plot(grid=True, label='Validation')
        #
        # plt.legend()
        # plt.title('window: {} threshold: {}, col:{}'.format(window_size, threshold, col))
        # plt.savefig('images/fig_{}_{}_{}.png'.format(window_size, threshold, col))
        # plt.show()


#
# plt.plot(probabilities, '.')
# plt.show()

# model = NgramModel(3, train_string_rep)
# perplexity = model.perplexity(train_string_rep2)

    train2_df['n_gram'] = np.where(train2_df['n_gram'] > 0, 1, 0)

    train2_df['diff'] = train2_df['ATT_FLAG_anom'] - train2_df['n_gram']

    arr = train2_df['diff'].value_counts()
    print(arr)

    TTD = utils.TDD_metric(train2_df, probabilities)
    TP = arr[99]
    FP = arr[-1]
    TN = arr[0]
    FN = arr[100]
    S_CM = utils.S_cm(TP, FP, TN, FN)

    accuracy = (TP + TN) / len(train2_df)
    precision = TP / (TP + FP)

    print('accuracy: {}, precision: {}'.format(accuracy, precision))
    print('TDD: {}'.format(TTD))
    print('S_cm: {}'.format(S_CM))
    print('Ranked: {}'.format(0.5 * TTD + 0.5 * S_CM))
Esempio n. 18
0
def reduce_dimension_function(option, X_train, new_dim):

    if option == 'pca':
        n_batches = 10
        pca = PCA(n_components=new_dim)
        pca.fit(X_train)
        X_reduced = pca.transform(X_train)
        print(np.shape(X_reduced))
        return X_reduced

    elif option == 'autoencoder':
        autoe = AUTOE()
        autoe.set_data(X_train)
        autoe.shuffle_data()
        # autoe.normalize(-1.0, 1.0)
        autoe.divide_data(0.8)
        autoe.create_autoencoder(new_dim)
        # autoe.normalize() # best results of clustering for interval [0, 1]
        # autoe.standardize()
        autoe.train_autoencoder()
        # autoe.test_autoencoder()
        # autoe.get_activations()
        autoe.sort_activations()

        # autoe.plot_reconstruction(i+1)
        # autoe.save_activations('caract_autoe.csv')
        # autoe.save_activations(filename+'_'+str(i+1)+'.csv')
        # autoe.save_activations('caract_autoe.csv')
        return autoe.get_activations()

    elif option == 'svd':
        svd = SVD()
        svd.set_data(X_train)
        # svd.load_data('dataset.csv')
        svd.shuffle_data()
        # svd.normalize(-1.0,1.0)
        # svd.standardize()
        svd.run_svd(new_dim)
        svd.sort_coefficients()
        # svd.save_activations('caract_'+svd.__class__.__name__.lower()+'60.csv')
        # svd.save_activations(filename+'_'+str(i+1)+'.csv')
        return svd.get_coefficients()

    elif option == 'cp':
        cp = CP()
        cp.set_data(X_train)
        # cp.load_data('dataset.csv')
        cp.shuffle_data()
        # cp.normalize(-1.0, 1.0)
        # cp.standardize()
        cp.execute_cp(new_dim)
        cp.sort_coefficients()
        # cp.save_activations(filename+'_'+str(i+1)+'.csv')
        # cp.save_activations('caract_cp.csv')
        return cp.get_coefficients()

    elif option == 'dct':
        dcost = DCT()
        dcost.set_data(X_train)
        dcost.shuffle_data()
        # dcost.normalize(-1.0, 1.0)
        dcost.execute_dct(new_dim)
        dcost.sort_coefficients()
        # dcost.save_activations(filename+'_'+str(i+1)+'.csv')
        # dcost.save_activations('caract_dct.csv')
        return dcost.get_coefficients()

    elif option == 'dwt':
        dwt = DWT()
        dwt.set_data(X_train)
        dwt.shuffle_data()
        # dwt.normalize(-1,1)
        # dwt.standardize()
        dwt.execute_dwt(new_dim)
        dwt.sort_coefficients()
        return dwt.get_coefficients()

    elif option == 'ipla':
        paa = IPLA()
        paa.set_data(X_train)
        # paa.load_data('dataset.csv')
        paa.shuffle_data()
        # paa.normalize()
        # paa.standardize()
        paa.execute_ipla(new_dim)
        paa.sort_coefficients()
        return paa.get_coefficients()

    elif option == 'paa':
        paa = PAA()
        paa.set_data(X_train)
        # paa.load_data('dataset.csv')
        paa.shuffle_data()
        # paa.normalize(-1, 1)
        # paa.standardize()
        paa.execute_paa(new_dim)
        paa.sort_coefficients()
        return paa.get_coefficients()

    elif option == 'sax':
        sax = SAX()
        sax.set_data(X_train)
        sax.shuffle_data()
        # sax.normalize()
        # sax.standardize()
        sax.execute_sax(new_dim)
        sax.sort_coefficients()

        return sax.get_coefficients()

    else:
        return 'Invalid option'
Esempio n. 19
0
data = raw_data.strip().split('\n')
data = np.array([float(x) for x in data])
size = data.shape[0]
r = 1.55
distance_cnt = 0


def distance(d1, d2):
    global distance_cnt
    distance_cnt += 1
    return np.linalg.norm(d1 - d2)

start = timeit.default_timer()
wordSize = 5
alphabetSize = 4
mysax = SAX(wordSize, alphabetSize)
mytrie = Trie(wordSize=wordSize, alphabetSize=alphabetSize)
symbol_list = []
for i in range(0, size - window +1):
    s, idx = mysax.to_letter_rep(data[i:window+i])
    symbol_list.append(s)
    mytrie.add(s, i)

print 'construct trie: ', timeit.default_timer()-start


def in_different_windoe(i):
    def pred(t):
        return True if abs(i - t) >= window else False
    return pred
Esempio n. 20
0
 def buildMotifs(self):
     s = SAX(self.wordSize, self.alphabetSize)
     self.saxterms = s.sliding_window(self.timeseries, self.windowSize)
     self.grammar = Grammar()
     self.grammar.train_string(self.saxterms)
     self.myrules = self.grammar.getRules()
Esempio n. 21
0
 def setUp(self):
     self.sax = SAX()
     self.delta = 1.0e-10