Esempio n. 1
0
    def build_seg(self, minSize = None, plotSwitch = None):
        if plotSwitch is None:
            plotSwitch = False
        if minSize is None:
            minSize = 2
        
        #GAP_k = self.GAP_decide_k(minSize)
        
        # by using the K generated by GAP_decide_k statics, segment the time series again
        # BUGFIX: bottomup_seg(ts=self.__ts...., should input self.__normTS!
        #segTS, _, seg_fit_list, seg_symbol_list = bottomup_seg(ts=self.__ts, max_err=sys.float_info.max, init_size=minSize, k=GAP_k, PLOT_DEBUG=plotSwitch)

        # GAP decide K
        # segTS, _, seg_fit_list = bottomup_seg(ts=self.__normTS, max_err=sys.float_info.max, init_size=minSize, k=GAP_k, PLOT_DEBUG=plotSwitch)

        # just use maximum segment error
        segTS, _, seg_fit_list = bottomup_seg(ts=self.__normTS, max_err=self.__zero_thresh, init_size=minSize, k=1, PLOT_DEBUG=plotSwitch)

        if plotSwitch:
            plot_segts_fit(self.__normTS, seg_ts=segTS, seg_fit=seg_fit_list)
            plt.show(block=False)

        self.__seg_fit_list = seg_fit_list 
        self.__segTS = segTS
        pass
Esempio n. 2
0
def gap_uniform(lenTS, minSize = None, num_round = None):
    if minSize is None:
        minSize = 2
    # See paper: we estimate error of segmentation by taking the mean of 10 uniformly randomly time series
    if num_round is None:
        num_round = 10

    random.seed(time.time())

    # estimate error from uniform random time series
    mat = np.array([])
    for i in range(num_round):
        # generate uniform time series in the range of [0,1]
        uniformTS = np.array([random.uniform(0,1) for _ in range(lenTS)])

        # bottomUp: use initSize=2 and k=1 to iterate every possible number of segments
        _, uniformSegCost, _ = bottomup_seg(ts=uniformTS, max_err=sys.float_info.max, k=1, init_size=minSize)

        # the testing K must be in [1, len(uniformTS) // 2]
        # assert len(uniformSegCost) == (len(uniformTS) // minSize - 1)
        if not( len(uniformSegCost) == (len(uniformTS) // minSize - 1)):
            print len(uniformSegCost), (len(uniformTS) // minSize - 1)

        # the length of uniformSegCost
        mat = mat.reshape(i, len(uniformSegCost))
        mat = np.vstack([mat, np.array(uniformSegCost)])
        pass

    # calculate the weighted GAP and get smoothing factor GAP
    mat = np.log(mat)
    sk = (num_round - 1) * publib.std(mat,axis=0) / num_round * np.sqrt(1 + 1/num_round)
    weightGAP = np.mean(mat, axis=0) - sk

    # return num_round uniform random experiments
    return weightGAP
Esempio n. 3
0
    def GAP_decide_k(self, minSize):
        logger = logging.getLogger("SSSTSR_Class.GAP_decide_k")
        logger.setLevel(self.__loggerLevel)
        logger.addHandler(publib.console_handle)        

        # calculate the error of uniform random time series
        uniformSegErr = gap_uniform(len(self.__normTS), minSize)

        # get segmentation error of scaled time series
        # TRICK: input normalize time series!
        _, ts_segCost, _ = bottomup_seg(max_err=sys.float_info.max, ts=self.__normTS, k=1, init_size=minSize)

        # TRICK: sometime regression is perfect matching, so ts_segCost is very tiny
        # cutOffPoint = (ts_segCost > 1e-10).sum()
        cutOffPoint = len(ts_segCost)

        # compute gap
        gap_K = np.log(ts_segCost[range(cutOffPoint)]) - uniformSegErr[range(cutOffPoint)]
        # gap_K_minus = ts_segCost[range(cutOffPoint)] - uniformSegErr[range(cutOffPoint)]
        
        # compute gap with first derivative, second derivative
        gapDiff1 = -np.diff(gap_K)
        gapDiff2 = -np.diff(gapDiff1)

        #-----------------------------------------------------
        # TRICK: the index of gapDiff2 start from 0, but it actually means 1
        # GAP_K = np.argmax(gapDiff2) + 1

        #-----------------------------------------------------
        # TRICK: In the ideal situation, the first derivative should become zeros from the point of K
        #         So, we add a "nearly zero" region, that gap_1st_dev < T (T is hardcode, 0.05 is this case)
        #         Then, find the K after which the first derivatives are strictly "nearly zero"
        # NOTE: nearlyZeroThresh is a very important internal parameter!
        nearlyZeroThresh = self.__zero_thresh # TODO: find it automaticlly?
        temp = (gapDiff1 >= nearlyZeroThresh) 

        if ((np.nonzero(temp[::-1]))[0]).size == 0:
            # all gapDiff1 could be less than nearlyZeroThresh
            GAP_K = GAP_K = np.argmax(gapDiff2) + 1
        else:
            GAP_K = len(gapDiff1) - (np.nonzero(temp[::-1]))[0][0]
        #logger.info("GAP choose " + str(GAP_K) + " as the number of segments.")
        
        # plot GAP_decide_k, 1st diff, 2nd diff curve
        # self.plot_gap_curve(gap_K, gapDiff1, gapDiff2, ts_segCost, uniformSegErr)
        # self.plot_gap_curve(gap_K, gapDiff1, gapDiff2, ts_segCost, uniformSegErr, gap_K_minus)
        #self.plot_gap_curve(gap_K, gapDiff1, gapDiff2)

        return GAP_K