def fit_shape(y): mean_y = np.mean(y) if len(y) < 5: return np.ones(len(y)) * np.mean(y), 0, shape_lib_len, "flat" # load or build shape series with the same length as y n = len(y) if n in shape_cache: shapes = shape_cache[n] else: x = np.linspace(0, 1, n) shapes = np.empty([shape_lib_len, n]) # iterate the shape library for i in range(shape_lib_len): shapeFun = shape_lib[i] shapes[i] = shapeFun(x) # store shape_cache[n] = shapes # normalize y, pay attention that standard deviation could be very small std_y = publib.std(y) if std_y < 1e-10: std_y = 1 normal_y = (y - mean_y) / std_y # Linear least square estimation numerator = np.dot(shapes, normal_y) # TRICK: we do not calculate norm(shape.^2, 2), save computation time # denominator = np.sum(shapes * shapes, axis=1) denominator = np.ones(shape_lib_len) * len(y) theta = numerator / denominator # maximum likelihood estimation: # Calculate the error with standard shapes, The matlab code is below: # err = sum((MB.shapes.*repmat(theta,1,length(yy))- repmat(yyn,size(MB.shapes,1), 1)).^2,2); err = np.sum(np.power((shapes.T * theta).T - normal_y, 2), axis=1) # find the minimum one likelihood, shape_ind = -1 * err.min(), err.argmin() # TRICK: flat shape is judged by |\theta|, see the paper if abs(theta[shape_ind]) < thresh_flat: shape_ind = shape_lib_len shape_dir = 'flat' fit_y = np.zeros(len(y)) likelihood = -1 * np.var(y) elif theta[shape_ind] < 0: shape_dir = 'dec' fit_y = theta[shape_ind] * shapes[shape_ind] else: shape_dir = 'inc' fit_y = theta[shape_ind] * shapes[shape_ind] fit_y = fit_y * std_y + mean_y return fit_y, likelihood, shape_ind, shape_dir
def gap_uniform(lenTS, minSize = None, num_round = None): if minSize is None: minSize = 2 # See paper: we estimate error of segmentation by taking the mean of 10 uniformly randomly time series if num_round is None: num_round = 10 random.seed(time.time()) # estimate error from uniform random time series mat = np.array([]) for i in range(num_round): # generate uniform time series in the range of [0,1] uniformTS = np.array([random.uniform(0,1) for _ in range(lenTS)]) # bottomUp: use initSize=2 and k=1 to iterate every possible number of segments _, uniformSegCost, _ = bottomup_seg(ts=uniformTS, max_err=sys.float_info.max, k=1, init_size=minSize) # the testing K must be in [1, len(uniformTS) // 2] # assert len(uniformSegCost) == (len(uniformTS) // minSize - 1) if not( len(uniformSegCost) == (len(uniformTS) // minSize - 1)): print len(uniformSegCost), (len(uniformTS) // minSize - 1) # the length of uniformSegCost mat = mat.reshape(i, len(uniformSegCost)) mat = np.vstack([mat, np.array(uniformSegCost)]) pass # calculate the weighted GAP and get smoothing factor GAP mat = np.log(mat) sk = (num_round - 1) * publib.std(mat,axis=0) / num_round * np.sqrt(1 + 1/num_round) weightGAP = np.mean(mat, axis=0) - sk # return num_round uniform random experiments return weightGAP