Esempio n. 1
0
def SSSTSR_search(whole_ts, left_idx, right_idx, query, sequence, ts, rtv_len, uuid, invariance):    
    # TODO: remove the hardcode
    somooth_window_len = -1
    zero_thresh=0.000005 # 3b00277d-d513-43a7-a54f-942a402c507e
    zero_thresh=0.001
    minSize = 5
    
    a = SSSTSR_Class(ts=whole_ts, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh)
    a.build_seg(plotSwitch = False, minSize=minSize)
    a.seg_enhance_represent()
    
    print "Number of segments: " + str(len(a.enhance_seg))
    
#     b = SSSTSR_Class(ts=query, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh, scale_train_ts = a)
#     b.build_seg(plotSwitch = False, minSize=minSize)
#     b.seg_enhance_represent()    
    
    seg_left_idx = a.get_seg_num_from_idx(left_idx)
    seg_right_idx = a.get_seg_num_from_idx(right_idx)
    
    knn_candidates_segmentwise = segmentwise_match_invariance(a.enhance_seg[seg_left_idx:seg_right_idx+1], a.enhance_seg[:seg_left_idx], 1, 3,
                                                              offset_inv = invariance['offset'], 
                                                              longitude_inv=invariance['longitudinal'], 
                                                              amplitude_inv=invariance['amplitude'], 
                                                              lineardrift_inv=invariance['linear'])
    output = {}
    output['n'] = 3
    output['list'] = []
    
    # add query ts
    query_dict = {}
    query_dict['ts'] = sample_sensor_data(ts[left_idx:right_idx], rtv_len)
    query_dict['start_time'] = ts[left_idx][0]
    query_dict['end_time'] = ts[right_idx-1][0]   
    
    output['query'] = query_dict    
    
    segs = a.get_seg_ts()
    for i, candidate in enumerate(knn_candidates_segmentwise):
        dist = candidate[1]
        pos = candidate[0]   
                
        len_matched_word = seg_right_idx - seg_left_idx + 1       
        match_str_startpos = (segs[pos])[0]
        match_str_endpos = (segs[pos+len_matched_word-1])[1]
        
        search_ts = sample_sensor_data(ts[match_str_startpos:match_str_endpos], rtv_len)              
        rtv = {}
        print dist
        rtv['dist'] = dist
        rtv['ts'] = search_ts
        rtv['start_time'] = search_ts[0][0]
        rtv['end_time'] = search_ts[-1][0]
        output['list'].append(copy.deepcopy(rtv))
        
    return output
Esempio n. 2
0
def SSSTSR_DTW_search(whole_ts, left_idx, right_idx, query, sequence, ts, rtv_len, uuid, invariance):    
    # TODO: remove the hardcode
    somooth_window_len = -1
    zero_thresh=0.000005 # 3b00277d-d513-43a7-a54f-942a402c507e
    zero_thresh=0.001
    minSize = 5
    
    a = SSSTSR_Class(ts=whole_ts, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh)
    a.build_seg(plotSwitch = False, minSize=minSize)
    a.seg_enhance_represent()
    
    print len(a.enhance_seg)
    
#     b = SSSTSR_Class(ts=query, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh, scale_train_ts = a)
#     b.build_seg(plotSwitch = False, minSize=minSize)
#     b.seg_enhance_represent()    
    
    seg_left_idx = a.get_seg_num_from_idx(left_idx)
    seg_right_idx = a.get_seg_num_from_idx(right_idx)
    
    match_pos_list, matching_words, match_dist_list, search_path = DTW_seg_match(a.enhance_seg[seg_left_idx:seg_right_idx+1], a.enhance_seg[:seg_left_idx],3,
                                                              offset_inv = invariance['offset'], 
                                                              longitude_inv=invariance['longitudinal'], 
                                                              amplitude_inv=invariance['amplitude'], 
                                                              lineardrift_inv=invariance['linear'])
    output = {}
    output['n'] = 3
    output['list'] = []
    
    # add query ts
    query_dict = {}
    query_dict['ts'] = sample_sensor_data(ts[left_idx:right_idx], rtv_len)
    query_dict['start_time'] = ts[left_idx][0]
    query_dict['end_time'] = ts[right_idx-1][0]   
    
    output['query'] = query_dict    
    
    segs = a.get_seg_ts()
    for i in range(len(match_pos_list)):
        len_matched_word = len(matching_words[i])        
        pos = match_pos_list[i]
        dist = match_dist_list[i]
        match_str_startpos = (segs[pos])[0]
        match_str_endpos = (segs[pos+len_matched_word-1])[1]
                  
        search_ts = sample_sensor_data(ts[match_str_startpos:match_str_endpos], rtv_len)              
        rtv = {}
        rtv['dist'] = dist
        rtv['ts'] = search_ts
        rtv['start_time'] = search_ts[0][0]
        rtv['end_time'] = search_ts[-1][0]
        output['list'].append(copy.deepcopy(rtv))    
        
    return output    
Esempio n. 3
0
def monotonousSigTest():
    # generate a synthetic signal which composed by 10 basic shape and has the length of 3000 
    test_ts, _, _ = monotone_randomsignal(10,3000,200)

    a = SSSTSR_Class(ts=test_ts)
    a.build_seg(plotSwitch = False, minSize=50)
    a.seg_encode()
    
    fig = plt.figure(figsize=(24, 4))
    plot_segts_fit(ts=a.get_ts(), seg_ts=a.get_seg_ts(), seg_fit=a.get_fit_list(), imshow=True, shapelist = a.get_symbol_list)
    
    print a.get_symbol_list
    print ''.join(a.shapelist)
Esempio n. 4
0
def logtest():
    c = SSSTSR_Class(ts=np.cos(np.arange(0,20,0.1)), logger_level=logging.DEBUG, start=0, end=10)
    c.build_seg(plotSwitch = True)
    pass
Esempio n. 5
0
def bottomUpGAPTest():
    c = SSSTSR_Class(ts=map(float, range(10) + range(10,0,-3) + range(2,20,3) + range(15,3,-1) + (np.cos(np.arange(0,6,0.2))*5).tolist()),
                     zero_thresh=0.05)
    c.build_seg(plotSwitch = True)
    pass
def idijktest():
    min_size = 5
    smooth_window_len = 5
    zero_thresh = 0.001
    
    # load idijk data
    # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Humidity.mat')
    # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_west-Rainfall.mat')
    # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Radiation.mat')
    # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Temperature')
    # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Winddirection')
    mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Windspeed')
    
    # fetch data and time tags
    data = mat['values'][0]
    times = mat['times'][0]
    
    # adjust smooth_window_len to get different level of smoothing
    a = SSSTSR_Class(ts=data, smooth_window_len = smooth_window_len, start=0, end=5000, zero_thresh=zero_thresh)
    a.build_seg(plotSwitch = False, minSize=min_size)
    a.seg_encode()
    a.plot()

    print "Word of the database:\t" + a.get_word()
    print "The number of symbols:\t" + str(len(a.get_word()))
    print "-----------------------------------"           
    
    piece_range = range(5200, 5400)
#     piece_range = range(1600, 1800)
#     piece_range = range(5000, 5200)

    piece = [data[i] for i in piece_range]
    piece_noise = piece + np.random.randn(len(piece))*2
    b = SSSTSR_Class(ts=piece, smooth_window_len = smooth_window_len, zero_thresh=zero_thresh)
    b.build_seg(plotSwitch = True, minSize=min_size)
    b.seg_encode()
    b.plot()
    print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" 
    print "-----------------------------------"   

    # search the best n matching
    n = 3
    print "Searching the best " + str(n) + " matching time series pieces..."    

# Hamming distance matching    
    match_pos_list, match_dist_list = hamming_match_best_effort(b.get_word(), a.get_word(), n)
    for i in range(n):
        pos = match_pos_list[i]
        dist = match_dist_list[i]
        match_str_startpos = (a.get_seg_ts()[pos])[0]
        match_str_endpos = (a.get_seg_ts()[pos+len(b.get_word())-1])[1]
        print "Matching " + str(i+1) + ":\t" + a.get_word()[pos:pos+len(b.get_word())-1] + \
                "[" + str(match_str_startpos) + ":" + str(match_str_endpos) + "] dist=" + str(dist) 
  
    # plot matching results   
    fig = plt.figure()
    ax = plt.subplot(n+1,1,1)
    ax.plot(piece_range, b.get_smooth_ts(), linewidth=1, color='r')
    ax.set_title("keyword time series: " + b.get_word())
    ts = a.get_smooth_ts()
    segs = a.get_seg_ts()
    title_text = []
    for i in range(n):
        pos = match_pos_list[i]
        dist = match_dist_list[i]
        match_str_startpos = (segs[pos])[0]
        match_str_endpos = (segs[pos+len(b.get_word())])[1]
          
        ax = plt.subplot(n+1,1,i+2)
        ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b')
        ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len(b.get_word())] + ", dist=" + str(dist))
              
    fig.subplots_adjust(hspace=1)
    plt.show(block=True) 
Esempio n. 7
0
# plt.plot(X_train[50])
# plt.plot(X_train[100])
# plt.plot(X_train[150])
# plt.plot(X_train[200])
# plt.plot(X_train[250])
# plt.show()


from ssstsr import SSSTSR_Class
from dist_seg_measure import *
import copy

trainset = []
for i in range(len(X_train)):
    bottomup_error = 0.001
    temp = SSSTSR_Class(ts=X_train[i], smooth_window_len = -1, zero_thresh=bottomup_error)
    temp.build_seg(minSize=2)
    temp.seg_enhance_represent()
    trainset.append(copy.deepcopy(temp.enhance_seg))

testset = []
for i in range(len(X_test)):
    bottomup_error = 0.001
    temp = SSSTSR_Class(ts=X_test[i], smooth_window_len = -1, zero_thresh=bottomup_error)
    temp.build_seg(minSize=2)
    temp.seg_enhance_represent()
    testset.append(copy.deepcopy(temp.enhance_seg))    

import sys
clf = []
cnt = 0
Esempio n. 8
0
def idijktest_segmentwise():
    min_size = 30
    smooth_window_len = 5
    zero_thresh = 0.0001
    
    data = test_tno_data_prep()
#     sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00']
#     sequence = np.array(sequence_ts.tolist())
# #     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
#     query_ts = data['2011-10-17 14:31':'2011-10-20 02:30']
#     query = np.array(query_ts.tolist())
    
    sequence_ts = data['2011-10-20 09:00':]
    sequence = np.array(sequence_ts.tolist())
#     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
#     query_ts = data['2011-10-17 14:31':'2011-10-18 02:30']
    query_ts = data['2011-10-17 14:31':'2011-10-20 02:30']
    query = np.array(query_ts.tolist())    
    
    # adjust smooth_window_len to get different level of smoothing
    a = SSSTSR_Class(ts=sequence, smooth_window_len = smooth_window_len, start=0, end = 80000, zero_thresh=zero_thresh)
    a.build_seg(plotSwitch = False, minSize=min_size)
    a.seg_encode()
    a.plot()

    print "Word of the database:\t" + a.get_word()
    print "The number of symbols:\t" + str(len(a.get_word()))
    print "-----------------------------------"           
    
    b = SSSTSR_Class(ts=query, smooth_window_len = smooth_window_len, zero_thresh=zero_thresh, scale_train_ts = a)
    b.build_seg(plotSwitch = False, minSize=min_size)
    b.seg_encode()
    b.plot()
    print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" 
    print "-----------------------------------"   

    a.seg_enhance_represent()
    b.seg_enhance_represent()

    plt.show(block=False)

    # search the best n matching
    n = 3
    win_interval = 1
    print "Searching the best " + str(n) + " matching time series pieces..."    

    # plot matching results   
    fig = plt.figure()
    ax = plt.subplot(n+1,1,1)
#     ax.plot(b.get_smooth_ts(), linewidth=1, color='r')
      # iterate each segment and plot
    ts=b.get_smooth_ts()
    seg_ts=b.get_seg_ts()
    for i in range(len(seg_ts)):
        seg = seg_ts[i]
        startpoint = seg[0]
        endpoint = seg[1]
        ax.plot(range(startpoint, endpoint), ts[startpoint:endpoint], linewidth=1, color='g')
    ax.set_xlim((seg_ts[0])[0], seg_ts[-1][-1])
    # plot vertical line to divide segments, just draw the line in the middle of two segments
    for seg in seg_ts[:-1]:
        # seg[1] is the exclusive(!!!) end point 
        seg_boundary = seg[1] - 0.5 
        ax.axvline(seg_boundary, linewidth=2, color='k')
    ax.set_title("keyword time series: " + b.get_word())

    ts = a.get_smooth_ts()
    segs = a.get_seg_ts()
    title_text = []

    #knn_candidates_euclidean = segmentwise_match(b.enhance_seg, a.enhance_seg, win_interval, n)
    knn_candidates_euclidean = segmentwise_match_invariance(b.enhance_seg, a.enhance_seg, win_interval, n, offset_inv = True )# longitude_inv=False, amplitude_inv=False, lineardrift_inv=False):    
    for i, candidate in enumerate(knn_candidates_euclidean):
        ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, i+2)
        dist = candidate[1]
        pos = candidate[0]
        len_matched_word = len(b.enhance_seg)
        
        match_str_startpos = (segs[pos])[0]
        match_str_endpos = (segs[pos+len_matched_word-1])[1]

        ax = plt.subplot(n+1,1,i+2)
        print match_str_startpos, match_str_endpos
        ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b')
        ax.set_xlim(match_str_startpos, match_str_endpos)
        
        seg_ts = a.get_seg_ts()
        for seg in seg_ts[pos:pos+len_matched_word]:
            # seg[1] is the exclusive(!!!) end point 
            seg_boundary = seg[1] - 0.5 
            ax.axvline(seg_boundary, linewidth=2, color='k')
        
        ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len_matched_word] + ", dist=" + str(dist))        
                
               
    fig.subplots_adjust(hspace=1)
    plt.show(block=True)    
    fig.subplots_adjust(hspace=1)
    plt.show(block=True)  
Esempio n. 9
0
def idijktest():
    min_size = 30
    smooth_window_len = 0
    zero_thresh = 0.01
    
    data = test_tno_data_prep()
#     sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00']
#     sequence = np.array(sequence_ts.tolist())
# #     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
#     query_ts = data['2011-10-17 14:31':'2011-10-20 02:30']
#     query = np.array(query_ts.tolist())
    
    sequence_ts = data['2011-10-20 09:00':]
    sequence = np.array(sequence_ts.tolist())
#     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
    query_ts = data['2011-10-17 14:31':'2011-10-20 02:30']
    query = np.array(query_ts.tolist())    
    
    # adjust smooth_window_len to get different level of smoothing
    a = SSSTSR_Class(ts=sequence, smooth_window_len = smooth_window_len, start=0, end=8000, zero_thresh=zero_thresh)
    a.build_seg(plotSwitch = False, minSize=min_size)
    a.seg_encode()
    a.plot()

    print "Word of the database:\t" + a.get_word()
    print "The number of symbols:\t" + str(len(a.get_word()))
    print "-----------------------------------"           
    
    b = SSSTSR_Class(ts=query, smooth_window_len = smooth_window_len, zero_thresh=0.05)
    b.build_seg(plotSwitch = False, minSize=min_size)
    b.seg_encode()
    b.plot()
    print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" 
    print "-----------------------------------"   

    # search the best n matching
    n = 3
    print "Searching the best " + str(n) + " matching time series pieces..."    

    # Levenshtein distance matching   
    match_pos_list, matching_words, match_dist_list, _ = leven_match(b.get_word(), a.get_word(), n)

    for i in range(n):
        len_matched_word = len(matching_words[i])
#         len_matched_word = len(b.get_word())
        pos = match_pos_list[i]
        dist = match_dist_list[i]
        match_str_startpos = (a.get_seg_ts()[pos])[0]
        match_str_endpos = (a.get_seg_ts()[pos+len_matched_word-1])[1]
        print "Matching " + str(i+1) + ":\t" + a.get_word()[pos:pos+len_matched_word] + \
                "[" + str(match_str_startpos) + ":" + str(match_str_endpos) + "] dist=" + str(dist) 
 
    # plot matching results   
    fig = plt.figure()
    ax = plt.subplot(n+1,1,1)
    ax.plot(b.get_smooth_ts(), linewidth=1, color='r')
    ax.set_title("keyword time series: " + b.get_word())
    ts = a.get_smooth_ts()
    segs = a.get_seg_ts()
    title_text = []
    for i in range(n):
        len_matched_word = len(matching_words[i])
        pos = match_pos_list[i]
        dist = match_dist_list[i]
        match_str_startpos = (segs[pos])[0]
        match_str_endpos = (segs[pos+len_matched_word-1])[1]
         
        ax = plt.subplot(n+1,1,i+2)
        ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b')
        ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len_matched_word] + ", dist=" + str(dist))
             
    fig.subplots_adjust(hspace=1)
    plt.show(block=True)