예제 #1
def SSSTSR_search(whole_ts, left_idx, right_idx, query, sequence, ts, rtv_len, uuid, invariance):    
    # TODO: remove the hardcode
    somooth_window_len = -1
    zero_thresh=0.000005 # 3b00277d-d513-43a7-a54f-942a402c507e
    minSize = 5
    a = SSSTSR_Class(ts=whole_ts, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh)
    a.build_seg(plotSwitch = False, minSize=minSize)
    print "Number of segments: " + str(len(a.enhance_seg))
#     b = SSSTSR_Class(ts=query, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh, scale_train_ts = a)
#     b.build_seg(plotSwitch = False, minSize=minSize)
#     b.seg_enhance_represent()    
    seg_left_idx = a.get_seg_num_from_idx(left_idx)
    seg_right_idx = a.get_seg_num_from_idx(right_idx)
    knn_candidates_segmentwise = segmentwise_match_invariance(a.enhance_seg[seg_left_idx:seg_right_idx+1], a.enhance_seg[:seg_left_idx], 1, 3,
                                                              offset_inv = invariance['offset'], 
    output = {}
    output['n'] = 3
    output['list'] = []
    # add query ts
    query_dict = {}
    query_dict['ts'] = sample_sensor_data(ts[left_idx:right_idx], rtv_len)
    query_dict['start_time'] = ts[left_idx][0]
    query_dict['end_time'] = ts[right_idx-1][0]   
    output['query'] = query_dict    
    segs = a.get_seg_ts()
    for i, candidate in enumerate(knn_candidates_segmentwise):
        dist = candidate[1]
        pos = candidate[0]   
        len_matched_word = seg_right_idx - seg_left_idx + 1       
        match_str_startpos = (segs[pos])[0]
        match_str_endpos = (segs[pos+len_matched_word-1])[1]
        search_ts = sample_sensor_data(ts[match_str_startpos:match_str_endpos], rtv_len)              
        rtv = {}
        print dist
        rtv['dist'] = dist
        rtv['ts'] = search_ts
        rtv['start_time'] = search_ts[0][0]
        rtv['end_time'] = search_ts[-1][0]
    return output
예제 #2
def SSSTSR_DTW_search(whole_ts, left_idx, right_idx, query, sequence, ts, rtv_len, uuid, invariance):    
    # TODO: remove the hardcode
    somooth_window_len = -1
    zero_thresh=0.000005 # 3b00277d-d513-43a7-a54f-942a402c507e
    minSize = 5
    a = SSSTSR_Class(ts=whole_ts, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh)
    a.build_seg(plotSwitch = False, minSize=minSize)
    print len(a.enhance_seg)
#     b = SSSTSR_Class(ts=query, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh, scale_train_ts = a)
#     b.build_seg(plotSwitch = False, minSize=minSize)
#     b.seg_enhance_represent()    
    seg_left_idx = a.get_seg_num_from_idx(left_idx)
    seg_right_idx = a.get_seg_num_from_idx(right_idx)
    match_pos_list, matching_words, match_dist_list, search_path = DTW_seg_match(a.enhance_seg[seg_left_idx:seg_right_idx+1], a.enhance_seg[:seg_left_idx],3,
                                                              offset_inv = invariance['offset'], 
    output = {}
    output['n'] = 3
    output['list'] = []
    # add query ts
    query_dict = {}
    query_dict['ts'] = sample_sensor_data(ts[left_idx:right_idx], rtv_len)
    query_dict['start_time'] = ts[left_idx][0]
    query_dict['end_time'] = ts[right_idx-1][0]   
    output['query'] = query_dict    
    segs = a.get_seg_ts()
    for i in range(len(match_pos_list)):
        len_matched_word = len(matching_words[i])        
        pos = match_pos_list[i]
        dist = match_dist_list[i]
        match_str_startpos = (segs[pos])[0]
        match_str_endpos = (segs[pos+len_matched_word-1])[1]
        search_ts = sample_sensor_data(ts[match_str_startpos:match_str_endpos], rtv_len)              
        rtv = {}
        rtv['dist'] = dist
        rtv['ts'] = search_ts
        rtv['start_time'] = search_ts[0][0]
        rtv['end_time'] = search_ts[-1][0]
    return output    
예제 #3
def monotonousSigTest():
    # generate a synthetic signal which composed by 10 basic shape and has the length of 3000 
    test_ts, _, _ = monotone_randomsignal(10,3000,200)

    a = SSSTSR_Class(ts=test_ts)
    a.build_seg(plotSwitch = False, minSize=50)
    fig = plt.figure(figsize=(24, 4))
    plot_segts_fit(ts=a.get_ts(), seg_ts=a.get_seg_ts(), seg_fit=a.get_fit_list(), imshow=True, shapelist = a.get_symbol_list)
    print a.get_symbol_list
    print ''.join(a.shapelist)
def idijktest():
    min_size = 5
    smooth_window_len = 5
    zero_thresh = 0.001
    # load idijk data
    # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Humidity.mat')
    # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_west-Rainfall.mat')
    # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Radiation.mat')
    # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Temperature')
    # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Winddirection')
    mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Windspeed')
    # fetch data and time tags
    data = mat['values'][0]
    times = mat['times'][0]
    # adjust smooth_window_len to get different level of smoothing
    a = SSSTSR_Class(ts=data, smooth_window_len = smooth_window_len, start=0, end=5000, zero_thresh=zero_thresh)
    a.build_seg(plotSwitch = False, minSize=min_size)

    print "Word of the database:\t" + a.get_word()
    print "The number of symbols:\t" + str(len(a.get_word()))
    print "-----------------------------------"           
    piece_range = range(5200, 5400)
#     piece_range = range(1600, 1800)
#     piece_range = range(5000, 5200)

    piece = [data[i] for i in piece_range]
    piece_noise = piece + np.random.randn(len(piece))*2
    b = SSSTSR_Class(ts=piece, smooth_window_len = smooth_window_len, zero_thresh=zero_thresh)
    b.build_seg(plotSwitch = True, minSize=min_size)
    print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" 
    print "-----------------------------------"   

    # search the best n matching
    n = 3
    print "Searching the best " + str(n) + " matching time series pieces..."    

# Hamming distance matching    
    match_pos_list, match_dist_list = hamming_match_best_effort(b.get_word(), a.get_word(), n)
    for i in range(n):
        pos = match_pos_list[i]
        dist = match_dist_list[i]
        match_str_startpos = (a.get_seg_ts()[pos])[0]
        match_str_endpos = (a.get_seg_ts()[pos+len(b.get_word())-1])[1]
        print "Matching " + str(i+1) + ":\t" + a.get_word()[pos:pos+len(b.get_word())-1] + \
                "[" + str(match_str_startpos) + ":" + str(match_str_endpos) + "] dist=" + str(dist) 
    # plot matching results   
    fig = plt.figure()
    ax = plt.subplot(n+1,1,1)
    ax.plot(piece_range, b.get_smooth_ts(), linewidth=1, color='r')
    ax.set_title("keyword time series: " + b.get_word())
    ts = a.get_smooth_ts()
    segs = a.get_seg_ts()
    title_text = []
    for i in range(n):
        pos = match_pos_list[i]
        dist = match_dist_list[i]
        match_str_startpos = (segs[pos])[0]
        match_str_endpos = (segs[pos+len(b.get_word())])[1]
        ax = plt.subplot(n+1,1,i+2)
        ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b')
        ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len(b.get_word())] + ", dist=" + str(dist))
예제 #5
def idijktest_segmentwise():
    min_size = 30
    smooth_window_len = 5
    zero_thresh = 0.0001
    data = test_tno_data_prep()
#     sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00']
#     sequence = np.array(sequence_ts.tolist())
# #     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
#     query_ts = data['2011-10-17 14:31':'2011-10-20 02:30']
#     query = np.array(query_ts.tolist())
    sequence_ts = data['2011-10-20 09:00':]
    sequence = np.array(sequence_ts.tolist())
#     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
#     query_ts = data['2011-10-17 14:31':'2011-10-18 02:30']
    query_ts = data['2011-10-17 14:31':'2011-10-20 02:30']
    query = np.array(query_ts.tolist())    
    # adjust smooth_window_len to get different level of smoothing
    a = SSSTSR_Class(ts=sequence, smooth_window_len = smooth_window_len, start=0, end = 80000, zero_thresh=zero_thresh)
    a.build_seg(plotSwitch = False, minSize=min_size)

    print "Word of the database:\t" + a.get_word()
    print "The number of symbols:\t" + str(len(a.get_word()))
    print "-----------------------------------"           
    b = SSSTSR_Class(ts=query, smooth_window_len = smooth_window_len, zero_thresh=zero_thresh, scale_train_ts = a)
    b.build_seg(plotSwitch = False, minSize=min_size)
    print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" 
    print "-----------------------------------"   



    # search the best n matching
    n = 3
    win_interval = 1
    print "Searching the best " + str(n) + " matching time series pieces..."    

    # plot matching results   
    fig = plt.figure()
    ax = plt.subplot(n+1,1,1)
#     ax.plot(b.get_smooth_ts(), linewidth=1, color='r')
      # iterate each segment and plot
    for i in range(len(seg_ts)):
        seg = seg_ts[i]
        startpoint = seg[0]
        endpoint = seg[1]
        ax.plot(range(startpoint, endpoint), ts[startpoint:endpoint], linewidth=1, color='g')
    ax.set_xlim((seg_ts[0])[0], seg_ts[-1][-1])
    # plot vertical line to divide segments, just draw the line in the middle of two segments
    for seg in seg_ts[:-1]:
        # seg[1] is the exclusive(!!!) end point 
        seg_boundary = seg[1] - 0.5 
        ax.axvline(seg_boundary, linewidth=2, color='k')
    ax.set_title("keyword time series: " + b.get_word())

    ts = a.get_smooth_ts()
    segs = a.get_seg_ts()
    title_text = []

    #knn_candidates_euclidean = segmentwise_match(b.enhance_seg, a.enhance_seg, win_interval, n)
    knn_candidates_euclidean = segmentwise_match_invariance(b.enhance_seg, a.enhance_seg, win_interval, n, offset_inv = True )# longitude_inv=False, amplitude_inv=False, lineardrift_inv=False):    
    for i, candidate in enumerate(knn_candidates_euclidean):
        ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, i+2)
        dist = candidate[1]
        pos = candidate[0]
        len_matched_word = len(b.enhance_seg)
        match_str_startpos = (segs[pos])[0]
        match_str_endpos = (segs[pos+len_matched_word-1])[1]

        ax = plt.subplot(n+1,1,i+2)
        print match_str_startpos, match_str_endpos
        ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b')
        ax.set_xlim(match_str_startpos, match_str_endpos)
        seg_ts = a.get_seg_ts()
        for seg in seg_ts[pos:pos+len_matched_word]:
            # seg[1] is the exclusive(!!!) end point 
            seg_boundary = seg[1] - 0.5 
            ax.axvline(seg_boundary, linewidth=2, color='k')
        ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len_matched_word] + ", dist=" + str(dist))        
예제 #6
def idijktest():
    min_size = 30
    smooth_window_len = 0
    zero_thresh = 0.01
    data = test_tno_data_prep()
#     sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00']
#     sequence = np.array(sequence_ts.tolist())
# #     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
#     query_ts = data['2011-10-17 14:31':'2011-10-20 02:30']
#     query = np.array(query_ts.tolist())
    sequence_ts = data['2011-10-20 09:00':]
    sequence = np.array(sequence_ts.tolist())
#     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
    query_ts = data['2011-10-17 14:31':'2011-10-20 02:30']
    query = np.array(query_ts.tolist())    
    # adjust smooth_window_len to get different level of smoothing
    a = SSSTSR_Class(ts=sequence, smooth_window_len = smooth_window_len, start=0, end=8000, zero_thresh=zero_thresh)
    a.build_seg(plotSwitch = False, minSize=min_size)

    print "Word of the database:\t" + a.get_word()
    print "The number of symbols:\t" + str(len(a.get_word()))
    print "-----------------------------------"           
    b = SSSTSR_Class(ts=query, smooth_window_len = smooth_window_len, zero_thresh=0.05)
    b.build_seg(plotSwitch = False, minSize=min_size)
    print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" 
    print "-----------------------------------"   

    # search the best n matching
    n = 3
    print "Searching the best " + str(n) + " matching time series pieces..."    

    # Levenshtein distance matching   
    match_pos_list, matching_words, match_dist_list, _ = leven_match(b.get_word(), a.get_word(), n)

    for i in range(n):
        len_matched_word = len(matching_words[i])
#         len_matched_word = len(b.get_word())
        pos = match_pos_list[i]
        dist = match_dist_list[i]
        match_str_startpos = (a.get_seg_ts()[pos])[0]
        match_str_endpos = (a.get_seg_ts()[pos+len_matched_word-1])[1]
        print "Matching " + str(i+1) + ":\t" + a.get_word()[pos:pos+len_matched_word] + \
                "[" + str(match_str_startpos) + ":" + str(match_str_endpos) + "] dist=" + str(dist) 
    # plot matching results   
    fig = plt.figure()
    ax = plt.subplot(n+1,1,1)
    ax.plot(b.get_smooth_ts(), linewidth=1, color='r')
    ax.set_title("keyword time series: " + b.get_word())
    ts = a.get_smooth_ts()
    segs = a.get_seg_ts()
    title_text = []
    for i in range(n):
        len_matched_word = len(matching_words[i])
        pos = match_pos_list[i]
        dist = match_dist_list[i]
        match_str_startpos = (segs[pos])[0]
        match_str_endpos = (segs[pos+len_matched_word-1])[1]
        ax = plt.subplot(n+1,1,i+2)
        ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b')
        ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len_matched_word] + ", dist=" + str(dist))