def build_seg(self, minSize = None, plotSwitch = None): if plotSwitch is None: plotSwitch = False if minSize is None: minSize = 2 #GAP_k = self.GAP_decide_k(minSize) # by using the K generated by GAP_decide_k statics, segment the time series again # BUGFIX: bottomup_seg(ts=self.__ts...., should input self.__normTS! #segTS, _, seg_fit_list, seg_symbol_list = bottomup_seg(ts=self.__ts, max_err=sys.float_info.max, init_size=minSize, k=GAP_k, PLOT_DEBUG=plotSwitch) # GAP decide K # segTS, _, seg_fit_list = bottomup_seg(ts=self.__normTS, max_err=sys.float_info.max, init_size=minSize, k=GAP_k, PLOT_DEBUG=plotSwitch) # just use maximum segment error segTS, _, seg_fit_list = bottomup_seg(ts=self.__normTS, max_err=self.__zero_thresh, init_size=minSize, k=1, PLOT_DEBUG=plotSwitch) if plotSwitch: plot_segts_fit(self.__normTS, seg_ts=segTS, seg_fit=seg_fit_list) plt.show(block=False) self.__seg_fit_list = seg_fit_list self.__segTS = segTS pass
def gap_uniform(lenTS, minSize = None, num_round = None): if minSize is None: minSize = 2 # See paper: we estimate error of segmentation by taking the mean of 10 uniformly randomly time series if num_round is None: num_round = 10 random.seed(time.time()) # estimate error from uniform random time series mat = np.array([]) for i in range(num_round): # generate uniform time series in the range of [0,1] uniformTS = np.array([random.uniform(0,1) for _ in range(lenTS)]) # bottomUp: use initSize=2 and k=1 to iterate every possible number of segments _, uniformSegCost, _ = bottomup_seg(ts=uniformTS, max_err=sys.float_info.max, k=1, init_size=minSize) # the testing K must be in [1, len(uniformTS) // 2] # assert len(uniformSegCost) == (len(uniformTS) // minSize - 1) if not( len(uniformSegCost) == (len(uniformTS) // minSize - 1)): print len(uniformSegCost), (len(uniformTS) // minSize - 1) # the length of uniformSegCost mat = mat.reshape(i, len(uniformSegCost)) mat = np.vstack([mat, np.array(uniformSegCost)]) pass # calculate the weighted GAP and get smoothing factor GAP mat = np.log(mat) sk = (num_round - 1) * publib.std(mat,axis=0) / num_round * np.sqrt(1 + 1/num_round) weightGAP = np.mean(mat, axis=0) - sk # return num_round uniform random experiments return weightGAP
def GAP_decide_k(self, minSize): logger = logging.getLogger("SSSTSR_Class.GAP_decide_k") logger.setLevel(self.__loggerLevel) logger.addHandler(publib.console_handle) # calculate the error of uniform random time series uniformSegErr = gap_uniform(len(self.__normTS), minSize) # get segmentation error of scaled time series # TRICK: input normalize time series! _, ts_segCost, _ = bottomup_seg(max_err=sys.float_info.max, ts=self.__normTS, k=1, init_size=minSize) # TRICK: sometime regression is perfect matching, so ts_segCost is very tiny # cutOffPoint = (ts_segCost > 1e-10).sum() cutOffPoint = len(ts_segCost) # compute gap gap_K = np.log(ts_segCost[range(cutOffPoint)]) - uniformSegErr[range(cutOffPoint)] # gap_K_minus = ts_segCost[range(cutOffPoint)] - uniformSegErr[range(cutOffPoint)] # compute gap with first derivative, second derivative gapDiff1 = -np.diff(gap_K) gapDiff2 = -np.diff(gapDiff1) #----------------------------------------------------- # TRICK: the index of gapDiff2 start from 0, but it actually means 1 # GAP_K = np.argmax(gapDiff2) + 1 #----------------------------------------------------- # TRICK: In the ideal situation, the first derivative should become zeros from the point of K # So, we add a "nearly zero" region, that gap_1st_dev < T (T is hardcode, 0.05 is this case) # Then, find the K after which the first derivatives are strictly "nearly zero" # NOTE: nearlyZeroThresh is a very important internal parameter! nearlyZeroThresh = self.__zero_thresh # TODO: find it automaticlly? temp = (gapDiff1 >= nearlyZeroThresh) if ((np.nonzero(temp[::-1]))[0]).size == 0: # all gapDiff1 could be less than nearlyZeroThresh GAP_K = GAP_K = np.argmax(gapDiff2) + 1 else: GAP_K = len(gapDiff1) - (np.nonzero(temp[::-1]))[0][0] #logger.info("GAP choose " + str(GAP_K) + " as the number of segments.") # plot GAP_decide_k, 1st diff, 2nd diff curve # self.plot_gap_curve(gap_K, gapDiff1, gapDiff2, ts_segCost, uniformSegErr) # self.plot_gap_curve(gap_K, gapDiff1, gapDiff2, ts_segCost, uniformSegErr, gap_K_minus) #self.plot_gap_curve(gap_K, gapDiff1, gapDiff2) return GAP_K