예제 #1
0
    def __init__(self, ts = None, start=None, end=None, max_err=None, logger_level=None, smooth_window_len = None, zero_thresh=0.05, scale_train_ts=None):
        # configure logging
        if logger_level is None:
            logger_level = logging.INFO
        
        logger = logging.getLogger("SSSTSR_Class.__init__")
        logger.setLevel(logger_level)
        logger.addHandler(publib.console_handle)

        # Initial rest parameters 
        if ts is None:
            ts = SSSTSR_Class.__default_ts
        if len(ts) == 0:
            raise ValueError("Invalid input: time series should not be empty!")

        if start is None:
            start = 0

        if end is None:
            # pay attention: a[0:end] means a[0] .. a[end-1] !!!
            end = len(ts)
        if end - start < 2:
            raise ValueError("Invalid input: minimal length of time series should be larger than 2!")

        if max_err is None:
            max_err = SSSTSR_Class.__default_max_err
            
        if smooth_window_len is None:
            smooth_window_len = 0

        # initialize instance variables
        self.__loggerLevel = logger_level    
        self.__ts = np.array(ts[start:end])        # time series
        self.__lenTS = len(self.__ts)            # length of time series
        self.__segTS = []                        # segments
        self.__seg_fit_list = []                    # the fitting result of segments
        self.__smooth_ts = publib.smooth(self.__ts, window_len=smooth_window_len) # use hamming windows by default
        
        if scale_train_ts is None:
            self.__normTS = publib.scale(self.__smooth_ts)
        else:
            self.__normTS = publib.scale(self.__smooth_ts, scale_train_ts.minV, scale_train_ts.maxV)
        
        self.minV = np.min(self.__ts)
        self.maxV = np.max(self.__ts)
        
        self.__maxErr = max_err                 # max err used in bottom up
        self.__zero_thresh = zero_thresh

        self.__symbollist = []
        self.__word = ""
        
        self.enhance_seg = []
예제 #2
0
def idijktest_euclidean():
    min_size = 30
    smooth_window_len = 5
    zero_thresh = 0.0001
    
    # load idijk data
    mat = scipy.io.loadmat('/home/zhe/Dropbox/TH/DDSC/Matlab/data/weatherstation_east-Windspeed')
    
    # fetch data and time tags
    data = mat['values'][0]
    times = mat['times'][0]
    
    data = publib.smooth(data, window_len=smooth_window_len) 
    
    sequence_ts = data[0:5000]
    sequence = np.array(sequence_ts.tolist())
#     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
    query_ts = data[5200:5500]
    query = np.array(query_ts.tolist())    
    
    n = 3
    win_interval = 10
    knn_candidates_euclidean = bruteforce_sliding_windows(query, sequence, win_interval, n, euclidean_dist)
     # plot matching results   
    fig = plt.figure()
    ax = plt.subplot(n+1,1,1)
    ax.plot(query, linewidth=1, color='r')
    ax.set_xlim(0, len(query))
    ax.set_title("query time series" )
    title_text = []
    
    for i, candidate in enumerate(knn_candidates_euclidean):
        dist = candidate[1]
        pos = candidate[0]
        match_str_startpos = pos
        match_str_endpos = pos + len(query)
         
        ax = plt.subplot(n+1,1,i+2)
        ax.plot(range(match_str_startpos, match_str_endpos), sequence[match_str_startpos:match_str_endpos], linewidth=1, color='b')
        ax.set_title("Matching " + str(i) + ": " + ", dist=" + str(dist))
        ax.set_xlim(match_str_startpos, match_str_endpos)
             
    fig.subplots_adjust(hspace=1)
    plt.show(block=True)  
예제 #3
0
def test_tno_data_matching(data, smooth_win=-1):
#     sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00']
    print type(data.index)
    
    sequence_ts = data['2011-10-20 09:00':'2011-12-20 09:00']
    sequence = np.array(sequence_ts.tolist())
#     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
    query_ts = data['2011-10-17 14:31':'2011-10-20 02:30']
    print query_ts.head(20)
    print type(query_ts.index)
    print len(query_ts)

#     query_ts = query_ts.resample('3Min', how='mean')    
    query = np.array(query_ts.tolist())
    print len(query)
    
#     sequence = (sequence - np.mean(sequence)) / np.std(sequence)
#     query = (query - np.mean(query)) / np.std(query)
       
    if smooth_win > 0:
        query = publib.smooth(query, smooth_win)
        query_ts = pd.Series(query, index=query_ts.index)
        sequence = publib.smooth(sequence, smooth_win)
        sequence_ts = pd.Series(sequence, index=sequence_ts.index)        
   
    k = 3
    win_interval = 60

#     knn_candidates = bruteforce_sliding_windows(query, sequence, 5, 4, dtw_dist_constrain, dist_only=True, r=5)
#     knn_candidates = LB_DTW_sliding_windows(query, sequence, 30, 4, 30)
    
    start_t = time.time()
    knn_candidates_euclidean = bruteforce_sliding_windows(query, sequence, win_interval, k, euclidean_dist)
    time_euclidean = time.time() - start_t
    print "Euclidean takes: " + str(time_euclidean)
                     
    xfmt = md.DateFormatter('%H')   
    plt.figure(1)    
    ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, 1)
    plt.title("query time series: from %s to %s" % (str(query_ts.index[0]), str(query_ts.index[-1])), fontsize=10)
    ax.xaxis.set_major_formatter(xfmt)
    query_ts.plot(color='g')
    for i, candidate in enumerate(knn_candidates_euclidean):
        ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, i+2)
        dist = candidate[1]
        pos = candidate[0]
#         subseq_start_time = str(sequence_ts.index[pos])
#         subseq_end_time = str(sequence_ts.index[pos+len(query)])
#         plt.title("KNN %d matching, from %s to %s, dist=%f" % (i+1, subseq_start_time, subseq_end_time, dist), fontsize=10)
#         ax.xaxis.set_major_formatter(xfmt)
#         sequence_ts.iloc[pos:pos+len(query)].plot(color='b')
        
        ax.plot(range(pos, pos+len(query)), sequence[pos:pos+len(query)], linewidth=1, color='b')
        ax.set_xlim(pos, pos+len(query))
        ax.set_title("Matching " + str(i) + ": dist=" + str(dist))
    plt.show() 
#     
#     start_t = time.time()    
#     knn_candidates_dtw = LB_DTW_sliding_windows(query, sequence, win_interval, k, win_interval)
#     time_dtw = time.time() - start_t    
#     print "DTW takes: " + str(time_dtw)
#         
#     plt.figure(2)
#     ax = plt.subplot(len(knn_candidates_dtw)+1, 1, 1)
#     plt.title("query time series: from %s to %s" % (str(query_ts.index[0]), str(query_ts.index[-1])), fontsize=10)
#     ax.xaxis.set_major_formatter(xfmt)
#     query_ts.plot(color='g')  
#          
#     for i, candidate in enumerate(knn_candidates_dtw):
#         ax = plt.subplot(len(knn_candidates_dtw)+1, 1, i+2)
#         dist = candidate[1]
#         pos = candidate[0]
#         subseq_start_time = str(sequence_ts.index[pos])
#         subseq_end_time = str(sequence_ts.index[pos+len(query)])
#         plt.title("DTW %d matching, from %s to %s, dist=%f" % (i+1, subseq_start_time, subseq_end_time, dist), fontsize=10)
#         ax.xaxis.set_major_formatter(xfmt)
#         sequence_ts.iloc[pos:pos+len(query)].plot(color='b')    
#     plt.show()           
          
    print "length of query ts: " + str(len(query))
    print "length of sequence: " + str(len(sequence))