def SSSTSR_search(whole_ts, left_idx, right_idx, query, sequence, ts, rtv_len, uuid, invariance): # TODO: remove the hardcode somooth_window_len = -1 zero_thresh=0.000005 # 3b00277d-d513-43a7-a54f-942a402c507e zero_thresh=0.001 minSize = 5 a = SSSTSR_Class(ts=whole_ts, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh) a.build_seg(plotSwitch = False, minSize=minSize) a.seg_enhance_represent() print "Number of segments: " + str(len(a.enhance_seg)) # b = SSSTSR_Class(ts=query, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh, scale_train_ts = a) # b.build_seg(plotSwitch = False, minSize=minSize) # b.seg_enhance_represent() seg_left_idx = a.get_seg_num_from_idx(left_idx) seg_right_idx = a.get_seg_num_from_idx(right_idx) knn_candidates_segmentwise = segmentwise_match_invariance(a.enhance_seg[seg_left_idx:seg_right_idx+1], a.enhance_seg[:seg_left_idx], 1, 3, offset_inv = invariance['offset'], longitude_inv=invariance['longitudinal'], amplitude_inv=invariance['amplitude'], lineardrift_inv=invariance['linear']) output = {} output['n'] = 3 output['list'] = [] # add query ts query_dict = {} query_dict['ts'] = sample_sensor_data(ts[left_idx:right_idx], rtv_len) query_dict['start_time'] = ts[left_idx][0] query_dict['end_time'] = ts[right_idx-1][0] output['query'] = query_dict segs = a.get_seg_ts() for i, candidate in enumerate(knn_candidates_segmentwise): dist = candidate[1] pos = candidate[0] len_matched_word = seg_right_idx - seg_left_idx + 1 match_str_startpos = (segs[pos])[0] match_str_endpos = (segs[pos+len_matched_word-1])[1] search_ts = sample_sensor_data(ts[match_str_startpos:match_str_endpos], rtv_len) rtv = {} print dist rtv['dist'] = dist rtv['ts'] = search_ts rtv['start_time'] = search_ts[0][0] rtv['end_time'] = search_ts[-1][0] output['list'].append(copy.deepcopy(rtv)) return output
def SSSTSR_DTW_search(whole_ts, left_idx, right_idx, query, sequence, ts, rtv_len, uuid, invariance): # TODO: remove the hardcode somooth_window_len = -1 zero_thresh=0.000005 # 3b00277d-d513-43a7-a54f-942a402c507e zero_thresh=0.001 minSize = 5 a = SSSTSR_Class(ts=whole_ts, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh) a.build_seg(plotSwitch = False, minSize=minSize) a.seg_enhance_represent() print len(a.enhance_seg) # b = SSSTSR_Class(ts=query, smooth_window_len = somooth_window_len, zero_thresh=zero_thresh, scale_train_ts = a) # b.build_seg(plotSwitch = False, minSize=minSize) # b.seg_enhance_represent() seg_left_idx = a.get_seg_num_from_idx(left_idx) seg_right_idx = a.get_seg_num_from_idx(right_idx) match_pos_list, matching_words, match_dist_list, search_path = DTW_seg_match(a.enhance_seg[seg_left_idx:seg_right_idx+1], a.enhance_seg[:seg_left_idx],3, offset_inv = invariance['offset'], longitude_inv=invariance['longitudinal'], amplitude_inv=invariance['amplitude'], lineardrift_inv=invariance['linear']) output = {} output['n'] = 3 output['list'] = [] # add query ts query_dict = {} query_dict['ts'] = sample_sensor_data(ts[left_idx:right_idx], rtv_len) query_dict['start_time'] = ts[left_idx][0] query_dict['end_time'] = ts[right_idx-1][0] output['query'] = query_dict segs = a.get_seg_ts() for i in range(len(match_pos_list)): len_matched_word = len(matching_words[i]) pos = match_pos_list[i] dist = match_dist_list[i] match_str_startpos = (segs[pos])[0] match_str_endpos = (segs[pos+len_matched_word-1])[1] search_ts = sample_sensor_data(ts[match_str_startpos:match_str_endpos], rtv_len) rtv = {} rtv['dist'] = dist rtv['ts'] = search_ts rtv['start_time'] = search_ts[0][0] rtv['end_time'] = search_ts[-1][0] output['list'].append(copy.deepcopy(rtv)) return output
def monotonousSigTest(): # generate a synthetic signal which composed by 10 basic shape and has the length of 3000 test_ts, _, _ = monotone_randomsignal(10,3000,200) a = SSSTSR_Class(ts=test_ts) a.build_seg(plotSwitch = False, minSize=50) a.seg_encode() fig = plt.figure(figsize=(24, 4)) plot_segts_fit(ts=a.get_ts(), seg_ts=a.get_seg_ts(), seg_fit=a.get_fit_list(), imshow=True, shapelist = a.get_symbol_list) print a.get_symbol_list print ''.join(a.shapelist)
def logtest(): c = SSSTSR_Class(ts=np.cos(np.arange(0,20,0.1)), logger_level=logging.DEBUG, start=0, end=10) c.build_seg(plotSwitch = True) pass
def bottomUpGAPTest(): c = SSSTSR_Class(ts=map(float, range(10) + range(10,0,-3) + range(2,20,3) + range(15,3,-1) + (np.cos(np.arange(0,6,0.2))*5).tolist()), zero_thresh=0.05) c.build_seg(plotSwitch = True) pass
def idijktest(): min_size = 5 smooth_window_len = 5 zero_thresh = 0.001 # load idijk data # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Humidity.mat') # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_west-Rainfall.mat') # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Radiation.mat') # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Temperature') # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Winddirection') mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Windspeed') # fetch data and time tags data = mat['values'][0] times = mat['times'][0] # adjust smooth_window_len to get different level of smoothing a = SSSTSR_Class(ts=data, smooth_window_len = smooth_window_len, start=0, end=5000, zero_thresh=zero_thresh) a.build_seg(plotSwitch = False, minSize=min_size) a.seg_encode() a.plot() print "Word of the database:\t" + a.get_word() print "The number of symbols:\t" + str(len(a.get_word())) print "-----------------------------------" piece_range = range(5200, 5400) # piece_range = range(1600, 1800) # piece_range = range(5000, 5200) piece = [data[i] for i in piece_range] piece_noise = piece + np.random.randn(len(piece))*2 b = SSSTSR_Class(ts=piece, smooth_window_len = smooth_window_len, zero_thresh=zero_thresh) b.build_seg(plotSwitch = True, minSize=min_size) b.seg_encode() b.plot() print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" print "-----------------------------------" # search the best n matching n = 3 print "Searching the best " + str(n) + " matching time series pieces..." # Hamming distance matching match_pos_list, match_dist_list = hamming_match_best_effort(b.get_word(), a.get_word(), n) for i in range(n): pos = match_pos_list[i] dist = match_dist_list[i] match_str_startpos = (a.get_seg_ts()[pos])[0] match_str_endpos = (a.get_seg_ts()[pos+len(b.get_word())-1])[1] print "Matching " + str(i+1) + ":\t" + a.get_word()[pos:pos+len(b.get_word())-1] + \ "[" + str(match_str_startpos) + ":" + str(match_str_endpos) + "] dist=" + str(dist) # plot matching results fig = plt.figure() ax = plt.subplot(n+1,1,1) ax.plot(piece_range, b.get_smooth_ts(), linewidth=1, color='r') ax.set_title("keyword time series: " + b.get_word()) ts = a.get_smooth_ts() segs = a.get_seg_ts() title_text = [] for i in range(n): pos = match_pos_list[i] dist = match_dist_list[i] match_str_startpos = (segs[pos])[0] match_str_endpos = (segs[pos+len(b.get_word())])[1] ax = plt.subplot(n+1,1,i+2) ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b') ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len(b.get_word())] + ", dist=" + str(dist)) fig.subplots_adjust(hspace=1) plt.show(block=True)
# plt.plot(X_train[50]) # plt.plot(X_train[100]) # plt.plot(X_train[150]) # plt.plot(X_train[200]) # plt.plot(X_train[250]) # plt.show() from ssstsr import SSSTSR_Class from dist_seg_measure import * import copy trainset = [] for i in range(len(X_train)): bottomup_error = 0.001 temp = SSSTSR_Class(ts=X_train[i], smooth_window_len = -1, zero_thresh=bottomup_error) temp.build_seg(minSize=2) temp.seg_enhance_represent() trainset.append(copy.deepcopy(temp.enhance_seg)) testset = [] for i in range(len(X_test)): bottomup_error = 0.001 temp = SSSTSR_Class(ts=X_test[i], smooth_window_len = -1, zero_thresh=bottomup_error) temp.build_seg(minSize=2) temp.seg_enhance_represent() testset.append(copy.deepcopy(temp.enhance_seg)) import sys clf = [] cnt = 0
def idijktest_segmentwise(): min_size = 30 smooth_window_len = 5 zero_thresh = 0.0001 data = test_tno_data_prep() # sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00'] # sequence = np.array(sequence_ts.tolist()) # # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] # query_ts = data['2011-10-17 14:31':'2011-10-20 02:30'] # query = np.array(query_ts.tolist()) sequence_ts = data['2011-10-20 09:00':] sequence = np.array(sequence_ts.tolist()) # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] # query_ts = data['2011-10-17 14:31':'2011-10-18 02:30'] query_ts = data['2011-10-17 14:31':'2011-10-20 02:30'] query = np.array(query_ts.tolist()) # adjust smooth_window_len to get different level of smoothing a = SSSTSR_Class(ts=sequence, smooth_window_len = smooth_window_len, start=0, end = 80000, zero_thresh=zero_thresh) a.build_seg(plotSwitch = False, minSize=min_size) a.seg_encode() a.plot() print "Word of the database:\t" + a.get_word() print "The number of symbols:\t" + str(len(a.get_word())) print "-----------------------------------" b = SSSTSR_Class(ts=query, smooth_window_len = smooth_window_len, zero_thresh=zero_thresh, scale_train_ts = a) b.build_seg(plotSwitch = False, minSize=min_size) b.seg_encode() b.plot() print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" print "-----------------------------------" a.seg_enhance_represent() b.seg_enhance_represent() plt.show(block=False) # search the best n matching n = 3 win_interval = 1 print "Searching the best " + str(n) + " matching time series pieces..." # plot matching results fig = plt.figure() ax = plt.subplot(n+1,1,1) # ax.plot(b.get_smooth_ts(), linewidth=1, color='r') # iterate each segment and plot ts=b.get_smooth_ts() seg_ts=b.get_seg_ts() for i in range(len(seg_ts)): seg = seg_ts[i] startpoint = seg[0] endpoint = seg[1] ax.plot(range(startpoint, endpoint), ts[startpoint:endpoint], linewidth=1, color='g') ax.set_xlim((seg_ts[0])[0], seg_ts[-1][-1]) # plot vertical line to divide segments, just draw the line in the middle of two segments for seg in seg_ts[:-1]: # seg[1] is the exclusive(!!!) end point seg_boundary = seg[1] - 0.5 ax.axvline(seg_boundary, linewidth=2, color='k') ax.set_title("keyword time series: " + b.get_word()) ts = a.get_smooth_ts() segs = a.get_seg_ts() title_text = [] #knn_candidates_euclidean = segmentwise_match(b.enhance_seg, a.enhance_seg, win_interval, n) knn_candidates_euclidean = segmentwise_match_invariance(b.enhance_seg, a.enhance_seg, win_interval, n, offset_inv = True )# longitude_inv=False, amplitude_inv=False, lineardrift_inv=False): for i, candidate in enumerate(knn_candidates_euclidean): ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, i+2) dist = candidate[1] pos = candidate[0] len_matched_word = len(b.enhance_seg) match_str_startpos = (segs[pos])[0] match_str_endpos = (segs[pos+len_matched_word-1])[1] ax = plt.subplot(n+1,1,i+2) print match_str_startpos, match_str_endpos ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b') ax.set_xlim(match_str_startpos, match_str_endpos) seg_ts = a.get_seg_ts() for seg in seg_ts[pos:pos+len_matched_word]: # seg[1] is the exclusive(!!!) end point seg_boundary = seg[1] - 0.5 ax.axvline(seg_boundary, linewidth=2, color='k') ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len_matched_word] + ", dist=" + str(dist)) fig.subplots_adjust(hspace=1) plt.show(block=True) fig.subplots_adjust(hspace=1) plt.show(block=True)
def idijktest(): min_size = 30 smooth_window_len = 0 zero_thresh = 0.01 data = test_tno_data_prep() # sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00'] # sequence = np.array(sequence_ts.tolist()) # # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] # query_ts = data['2011-10-17 14:31':'2011-10-20 02:30'] # query = np.array(query_ts.tolist()) sequence_ts = data['2011-10-20 09:00':] sequence = np.array(sequence_ts.tolist()) # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] query_ts = data['2011-10-17 14:31':'2011-10-20 02:30'] query = np.array(query_ts.tolist()) # adjust smooth_window_len to get different level of smoothing a = SSSTSR_Class(ts=sequence, smooth_window_len = smooth_window_len, start=0, end=8000, zero_thresh=zero_thresh) a.build_seg(plotSwitch = False, minSize=min_size) a.seg_encode() a.plot() print "Word of the database:\t" + a.get_word() print "The number of symbols:\t" + str(len(a.get_word())) print "-----------------------------------" b = SSSTSR_Class(ts=query, smooth_window_len = smooth_window_len, zero_thresh=0.05) b.build_seg(plotSwitch = False, minSize=min_size) b.seg_encode() b.plot() print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" print "-----------------------------------" # search the best n matching n = 3 print "Searching the best " + str(n) + " matching time series pieces..." # Levenshtein distance matching match_pos_list, matching_words, match_dist_list, _ = leven_match(b.get_word(), a.get_word(), n) for i in range(n): len_matched_word = len(matching_words[i]) # len_matched_word = len(b.get_word()) pos = match_pos_list[i] dist = match_dist_list[i] match_str_startpos = (a.get_seg_ts()[pos])[0] match_str_endpos = (a.get_seg_ts()[pos+len_matched_word-1])[1] print "Matching " + str(i+1) + ":\t" + a.get_word()[pos:pos+len_matched_word] + \ "[" + str(match_str_startpos) + ":" + str(match_str_endpos) + "] dist=" + str(dist) # plot matching results fig = plt.figure() ax = plt.subplot(n+1,1,1) ax.plot(b.get_smooth_ts(), linewidth=1, color='r') ax.set_title("keyword time series: " + b.get_word()) ts = a.get_smooth_ts() segs = a.get_seg_ts() title_text = [] for i in range(n): len_matched_word = len(matching_words[i]) pos = match_pos_list[i] dist = match_dist_list[i] match_str_startpos = (segs[pos])[0] match_str_endpos = (segs[pos+len_matched_word-1])[1] ax = plt.subplot(n+1,1,i+2) ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b') ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len_matched_word] + ", dist=" + str(dist)) fig.subplots_adjust(hspace=1) plt.show(block=True)