def __init__(self, ts = None, start=None, end=None, max_err=None, logger_level=None, smooth_window_len = None, zero_thresh=0.05, scale_train_ts=None): # configure logging if logger_level is None: logger_level = logging.INFO logger = logging.getLogger("SSSTSR_Class.__init__") logger.setLevel(logger_level) logger.addHandler(publib.console_handle) # Initial rest parameters if ts is None: ts = SSSTSR_Class.__default_ts if len(ts) == 0: raise ValueError("Invalid input: time series should not be empty!") if start is None: start = 0 if end is None: # pay attention: a[0:end] means a[0] .. a[end-1] !!! end = len(ts) if end - start < 2: raise ValueError("Invalid input: minimal length of time series should be larger than 2!") if max_err is None: max_err = SSSTSR_Class.__default_max_err if smooth_window_len is None: smooth_window_len = 0 # initialize instance variables self.__loggerLevel = logger_level self.__ts = np.array(ts[start:end]) # time series self.__lenTS = len(self.__ts) # length of time series self.__segTS = [] # segments self.__seg_fit_list = [] # the fitting result of segments self.__smooth_ts = publib.smooth(self.__ts, window_len=smooth_window_len) # use hamming windows by default if scale_train_ts is None: self.__normTS = publib.scale(self.__smooth_ts) else: self.__normTS = publib.scale(self.__smooth_ts, scale_train_ts.minV, scale_train_ts.maxV) self.minV = np.min(self.__ts) self.maxV = np.max(self.__ts) self.__maxErr = max_err # max err used in bottom up self.__zero_thresh = zero_thresh self.__symbollist = [] self.__word = "" self.enhance_seg = []
def idijktest_euclidean(): min_size = 30 smooth_window_len = 5 zero_thresh = 0.0001 # load idijk data mat = scipy.io.loadmat('/home/zhe/Dropbox/TH/DDSC/Matlab/data/weatherstation_east-Windspeed') # fetch data and time tags data = mat['values'][0] times = mat['times'][0] data = publib.smooth(data, window_len=smooth_window_len) sequence_ts = data[0:5000] sequence = np.array(sequence_ts.tolist()) # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] query_ts = data[5200:5500] query = np.array(query_ts.tolist()) n = 3 win_interval = 10 knn_candidates_euclidean = bruteforce_sliding_windows(query, sequence, win_interval, n, euclidean_dist) # plot matching results fig = plt.figure() ax = plt.subplot(n+1,1,1) ax.plot(query, linewidth=1, color='r') ax.set_xlim(0, len(query)) ax.set_title("query time series" ) title_text = [] for i, candidate in enumerate(knn_candidates_euclidean): dist = candidate[1] pos = candidate[0] match_str_startpos = pos match_str_endpos = pos + len(query) ax = plt.subplot(n+1,1,i+2) ax.plot(range(match_str_startpos, match_str_endpos), sequence[match_str_startpos:match_str_endpos], linewidth=1, color='b') ax.set_title("Matching " + str(i) + ": " + ", dist=" + str(dist)) ax.set_xlim(match_str_startpos, match_str_endpos) fig.subplots_adjust(hspace=1) plt.show(block=True)
def test_tno_data_matching(data, smooth_win=-1): # sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00'] print type(data.index) sequence_ts = data['2011-10-20 09:00':'2011-12-20 09:00'] sequence = np.array(sequence_ts.tolist()) # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] query_ts = data['2011-10-17 14:31':'2011-10-20 02:30'] print query_ts.head(20) print type(query_ts.index) print len(query_ts) # query_ts = query_ts.resample('3Min', how='mean') query = np.array(query_ts.tolist()) print len(query) # sequence = (sequence - np.mean(sequence)) / np.std(sequence) # query = (query - np.mean(query)) / np.std(query) if smooth_win > 0: query = publib.smooth(query, smooth_win) query_ts = pd.Series(query, index=query_ts.index) sequence = publib.smooth(sequence, smooth_win) sequence_ts = pd.Series(sequence, index=sequence_ts.index) k = 3 win_interval = 60 # knn_candidates = bruteforce_sliding_windows(query, sequence, 5, 4, dtw_dist_constrain, dist_only=True, r=5) # knn_candidates = LB_DTW_sliding_windows(query, sequence, 30, 4, 30) start_t = time.time() knn_candidates_euclidean = bruteforce_sliding_windows(query, sequence, win_interval, k, euclidean_dist) time_euclidean = time.time() - start_t print "Euclidean takes: " + str(time_euclidean) xfmt = md.DateFormatter('%H') plt.figure(1) ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, 1) plt.title("query time series: from %s to %s" % (str(query_ts.index[0]), str(query_ts.index[-1])), fontsize=10) ax.xaxis.set_major_formatter(xfmt) query_ts.plot(color='g') for i, candidate in enumerate(knn_candidates_euclidean): ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, i+2) dist = candidate[1] pos = candidate[0] # subseq_start_time = str(sequence_ts.index[pos]) # subseq_end_time = str(sequence_ts.index[pos+len(query)]) # plt.title("KNN %d matching, from %s to %s, dist=%f" % (i+1, subseq_start_time, subseq_end_time, dist), fontsize=10) # ax.xaxis.set_major_formatter(xfmt) # sequence_ts.iloc[pos:pos+len(query)].plot(color='b') ax.plot(range(pos, pos+len(query)), sequence[pos:pos+len(query)], linewidth=1, color='b') ax.set_xlim(pos, pos+len(query)) ax.set_title("Matching " + str(i) + ": dist=" + str(dist)) plt.show() # # start_t = time.time() # knn_candidates_dtw = LB_DTW_sliding_windows(query, sequence, win_interval, k, win_interval) # time_dtw = time.time() - start_t # print "DTW takes: " + str(time_dtw) # # plt.figure(2) # ax = plt.subplot(len(knn_candidates_dtw)+1, 1, 1) # plt.title("query time series: from %s to %s" % (str(query_ts.index[0]), str(query_ts.index[-1])), fontsize=10) # ax.xaxis.set_major_formatter(xfmt) # query_ts.plot(color='g') # # for i, candidate in enumerate(knn_candidates_dtw): # ax = plt.subplot(len(knn_candidates_dtw)+1, 1, i+2) # dist = candidate[1] # pos = candidate[0] # subseq_start_time = str(sequence_ts.index[pos]) # subseq_end_time = str(sequence_ts.index[pos+len(query)]) # plt.title("DTW %d matching, from %s to %s, dist=%f" % (i+1, subseq_start_time, subseq_end_time, dist), fontsize=10) # ax.xaxis.set_major_formatter(xfmt) # sequence_ts.iloc[pos:pos+len(query)].plot(color='b') # plt.show() print "length of query ts: " + str(len(query)) print "length of sequence: " + str(len(sequence))