コード例 #1
0
def star_model_predict(Xpred, coefs, descr):
    BS, TS = Bspline(), Bspline()
    basis = []

    for e in descr:
        type_, nr_splines, constraints, lam_c, knot_types = e[0], e[1], e[2], e[3], e[4]
        print("Process ", type_)
        time.sleep(0.2)
        if type_.startswith("s"):
            dim = int(type_[2])
            B = BS.basismatrix(X=Xpred[:,dim-1], nr_splines=nr_splines, l=3, knot_type=knot_types)["basis"]
        elif type_.startswith("t"):
            dim1, dim2 = int(type_[2]), int(type_[4])
            B = TS.tensorproduct_basismatrix(X=Xpred[:,[dim1-1, dim2-1]], nr_splines=nr_splines, l=(3,3), knot_type=knot_types)["basis"]
        else:
            print("Only B-splines (s) and tensor-product B-splines (t) are supported!")
        basis.append(B)
    
    # create combined basis matrix
    B = np.concatenate(basis, axis=1)
    y = B@coefs
    
    return dict(basis=B, y=y, X=Xpred)
コード例 #2
0
def star_model(descr, X, y):
    """Fit a structured additive regression model using B-splines to the data in (X,y). 
    
    Parameters:
    -----------
    descr : tuple of tuples    - Describes the model structure, e.g. 
                                 descr = ( ("s(1)", 100, "inc", 6000, "e"), 
                                         ("t(1,2)", (12,10), ("none", "none"), (6000,6000), ("e", "e")), ),
                                 describing a model using a P-spline with increasing constraint and 100 
                                 basis functions for dimension 1 and a tensor-product P-spline without 
                                 constraints using 12 and 10 basis functions for the respective dimension.
    X : array                  - np.array of the input data, shape (n_samples, n_dim)
    y : array                  - np.array of the target data, shape (n_samples, )
    
    Returns:
    --------
    d : dict      - Returns a dictionary with the following key-value pairs:
                        basis=B, 
                        smoothness=S, 
                        constraint=K, 
                        opt_lambdas=optimal_lambdas, 
                        coef_=coef_pls, 
                        weights=weights
    """
    
    BS, TS = Bspline(), Bspline()
    coefs = []
    basis, smoothness = [], []
    constr, optimal_lambdas = [], []
    weights, weights_compare = [], []
    S, K = [], []

    for e in descr:
        type_, nr_splines, constraints, lam_c, knot_types = e[0], e[1], e[2], e[3], e[4]
        print("Process ", type_)
        if type_.startswith("s"):
            dim = int(type_[2])
            B = BS.basismatrix(X=X[:,dim-1], nr_splines=nr_splines, l=3, knot_type=knot_types)["basis"]
            Ds = mm(nr_splines, constraint="smooth", dim=dim-1)
            Dc = mm(nr_splines, constraint=constraints, dim=dim-1)
            lam = BS.calc_GCV(X=X[:,dim-1], y=y, nr_splines=nr_splines, l=3, knot_type=knot_types, nr_lam=50)["best_lambda"]
            coef_pls = BS.fit_Pspline(X=X[:,dim-1], y=y, nr_splines=nr_splines, l=3, knot_type=knot_types, lam=lam)["coef_"]
            W = check_constraint(coef=coef_pls, constraint=constraints, y=y, B=B)
            weights.append(W)
            weights_compare += list(W)
        elif type_.startswith("t"):
            print("Constraint = ", constraints)
            dim1, dim2 = int(type_[2]), int(type_[4])
            B = TS.tensorproduct_basismatrix(X=X[:,[dim1-1, dim2-1]], nr_splines=nr_splines, l=(3,3), knot_type=knot_types)["basis"]
            Ds1 = mm(nr_splines, constraint="smooth", dim=dim1-1)
            Ds2 = mm(nr_splines, constraint="smooth", dim=dim2-1)
            Dc1 = mm(nr_splines, constraint=constraints[0], dim=dim1-1)
            Dc2 = mm(nr_splines, constraint=constraints[1], dim=dim2-1)
            lam = TS.calc_GCV_2d(X=X[:,[dim1-1, dim2-1]], y=y, nr_splines=nr_splines, l=(3,3), knot_type=knot_types, nr_lam=50)["best_lambda"]
            coef_pls = TS.fit_Pspline(X=X[:,[dim1-1, dim2-1]], y=y, nr_splines=nr_splines, l=(3,3), knot_type=knot_types, lam=lam)["coef_"]
            W1 = check_constraint_dim1(coef_pls, nr_splines=nr_splines, constraint=constraints[0])
            W2 = check_constraint_dim2(coef_pls, nr_splines=nr_splines, constraint=constraints[1])
            weights.append((W1, W2))
            weights_compare += list(W1) + list(W2)
            Ds = (Ds1, Ds2)
            Dc = (Dc1, Dc2)
        else:
            print("Only B-splines (s) and tensor-product B-splines (t) are supported!")

        basis.append(B)
        smoothness.append(Ds)
        constr.append(Dc)
        optimal_lambdas.append(lam)
        coefs.append(coef_pls)

    coef_pls = np.concatenate(coefs)
    # create combined basis matrix
    B = np.concatenate(basis, axis=1)

    # create combined smoothness matrix
    for i, s in enumerate(smoothness):
        if len(s) == 2:
            S.append(optimal_lambdas[i]*(s[0].T @ s[0] + s[1].T@s[1]))
        else:
            S.append(optimal_lambdas[i]*(s.T@s))
    S = block_diag(*S)
    # create combined constraint matrix

    for i, c in enumerate(constr):
        if len(c) == 2:
            K.append(6000*(c[0].T @ np.diag(weights[i][0]) @ c[0]) + 6000*(c[1].T @np.diag(weights[i][1]) @ c[1]))
        else:
            K.append(6000* (c.T@ np.diag(weights[i])@c))
    K = block_diag(*K)

    weights_old = [0]*len(weights_compare)
    iterIdx = 1
    BtB = B.T @ B
    Bty = B.T @ y

    # Iterate till no change in weights
    df = pd.DataFrame(data=dict(w0=np.ones(2*12*10+100+100-2)))
    while not (weights_compare == weights_old):
        weights_old = weights_compare
        coef_pls = np.linalg.pinv(BtB + S + K) @ (Bty)
        weights, weights_compare = check_constraint_full(coef_=coef_pls, descr=descr, basis=B, y=y)
        # weights = check_constraint_full(coef_=coef_pls, descr=m, basis=B, y=y)

        print(f" Iteration {iterIdx} ".center(50, "="))
        print(f"MSE = {mean_squared_error(y, B@coef_pls).round(7)}".center(50, "-"))

        K = []
        print("Calculate new constraint matrix K".center(50,"-"))
        for i, c in enumerate(constr):
            if len(c) == 2:
                K.append(descr[i][3][0]*(c[0].T @ np.diag(weights[i][0]) @ c[0]) + descr[i][3][0]*(c[1].T @np.diag(weights[i][1]) @ c[1]))
            else:
                K.append(descr[i][3]*(c.T@ np.diag(weights[i])@c))
        K = block_diag(*K)

        df = pd.concat([df, pd.DataFrame(data={"w"+str(iterIdx):weights_compare})], axis=1)

        if iterIdx > 200:
            print("breaking")
            break

        iterIdx += 1
        
    print("Iteration Finished".center(50, "#"))
    return dict(basis=B, smoothness=S, constraint=K, opt_lambdas=optimal_lambdas, coef_=coef_pls, weights=weights)