def fit_shape(y):

    mean_y = np.mean(y)
     
    if len(y) < 5:
        return np.ones(len(y)) * np.mean(y), 0, shape_lib_len, "flat"

    # load or build shape series with the same length as y
    n = len(y)
    if n in shape_cache:
        shapes = shape_cache[n]
    else:
        x = np.linspace(0, 1, n)
        shapes = np.empty([shape_lib_len, n])

        # iterate the shape library
        for i in range(shape_lib_len): 
            shapeFun = shape_lib[i]
            shapes[i] = shapeFun(x)

        # store 
        shape_cache[n] = shapes
    
    # normalize y, pay attention that standard deviation could be very small
    std_y = publib.std(y)
    if std_y < 1e-10:
        std_y = 1
    normal_y = (y - mean_y) / std_y

    # Linear least square estimation
    numerator = np.dot(shapes, normal_y) 
    # TRICK: we do not calculate norm(shape.^2, 2), save computation time
    # denominator = np.sum(shapes * shapes, axis=1)
    denominator = np.ones(shape_lib_len) * len(y)
    theta = numerator / denominator
    
    # maximum likelihood estimation:
    # Calculate the error with standard shapes, The matlab code is below:
    # err   = sum((MB.shapes.*repmat(theta,1,length(yy))- repmat(yyn,size(MB.shapes,1), 1)).^2,2);        
    err = np.sum(np.power((shapes.T * theta).T - normal_y, 2), axis=1)

    # find the minimum one        
    likelihood, shape_ind = -1 * err.min(), err.argmin()

    # TRICK: flat shape is judged by |\theta|, see the paper         
    if abs(theta[shape_ind]) < thresh_flat:
        shape_ind = shape_lib_len
        shape_dir = 'flat'
        fit_y = np.zeros(len(y))
        likelihood = -1 * np.var(y)
    elif theta[shape_ind] < 0:
        shape_dir = 'dec'
        fit_y = theta[shape_ind] * shapes[shape_ind]
    else:
        shape_dir = 'inc'
        fit_y = theta[shape_ind] * shapes[shape_ind]
    
    fit_y = fit_y * std_y + mean_y

    return fit_y, likelihood, shape_ind, shape_dir
def gap_uniform(lenTS, minSize = None, num_round = None):
    if minSize is None:
        minSize = 2
    # See paper: we estimate error of segmentation by taking the mean of 10 uniformly randomly time series
    if num_round is None:
        num_round = 10

    random.seed(time.time())

    # estimate error from uniform random time series
    mat = np.array([])
    for i in range(num_round):
        # generate uniform time series in the range of [0,1]
        uniformTS = np.array([random.uniform(0,1) for _ in range(lenTS)])

        # bottomUp: use initSize=2 and k=1 to iterate every possible number of segments
        _, uniformSegCost, _ = bottomup_seg(ts=uniformTS, max_err=sys.float_info.max, k=1, init_size=minSize)

        # the testing K must be in [1, len(uniformTS) // 2]
        # assert len(uniformSegCost) == (len(uniformTS) // minSize - 1)
        if not( len(uniformSegCost) == (len(uniformTS) // minSize - 1)):
            print len(uniformSegCost), (len(uniformTS) // minSize - 1)

        # the length of uniformSegCost
        mat = mat.reshape(i, len(uniformSegCost))
        mat = np.vstack([mat, np.array(uniformSegCost)])
        pass

    # calculate the weighted GAP and get smoothing factor GAP
    mat = np.log(mat)
    sk = (num_round - 1) * publib.std(mat,axis=0) / num_round * np.sqrt(1 + 1/num_round)
    weightGAP = np.mean(mat, axis=0) - sk

    # return num_round uniform random experiments
    return weightGAP