コード例 #1
0
ファイル: test_pickling.py プロジェクト: yu-9824/ngboost
def fixture_learners_data(breast_cancer_data, boston_data,
                          boston_survival_data):
    """
    Returns:
        A list of iterables,
        each iterable containing a fitted model and
        X data and the predictions for the X_data
    """

    models_data = []
    X_class_train, _, Y_class_train, _ = breast_cancer_data
    ngb = NGBClassifier(verbose=False, n_estimators=10)
    ngb.fit(X_class_train, Y_class_train)
    models_data.append((ngb, X_class_train, ngb.predict(X_class_train)))

    X_reg_train, _, Y_reg_train, _ = boston_data
    ngb = NGBRegressor(verbose=False, n_estimators=10)
    ngb.fit(X_reg_train, Y_reg_train)
    models_data.append((ngb, X_reg_train, ngb.predict(X_reg_train)))

    X_surv_train, _, T_surv_train, E_surv_train, _ = boston_survival_data
    ngb = NGBSurvival(verbose=False, n_estimators=10)
    ngb.fit(X_surv_train, T_surv_train, E_surv_train)
    models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train)))

    ngb = NGBRegressor(Dist=MultivariateNormal(2), n_estimators=10)
    ngb.fit(X_surv_train, np.vstack([T_surv_train, E_surv_train]).T)
    models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train)))
    return models_data
コード例 #2
0
def estimate_infcens(Y):
    res = {}
    params = np.array([[0, 0, 1, 0, 1]] * N).T
    for _ in range(100000):
        D = MultivariateNormal(params)
        S = MLE()
        grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True)
        params = params - 1 * grad
        if np.linalg.norm(grad) < 1e-4:
            break

    print('Jointly Estimated E:', params[0, 0])
    res['joint'] = params[0, 0]

    params = np.array([[0, 0]] * N).T
    for _ in range(100000):
        D = LogNormal(params)
        S = MLE()
        grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True)
        params = params - 0.005 * grad
        if np.linalg.norm(grad) < 1e-4:
            break

    print('Estimate E (assume non-inf):', params[0, 0])
    res['lognorm'] = params[0, 0]
    return res
コード例 #3
0
def mvnorm_mle(Y, max_iter=1e4, lr=0.5, eps=1e-4):
    N = Y.shape[0]
    params = np.array([[0, 0, 1, 0, 1]] * N).T
    for _ in range(max_iter):
        D = MultivariateNormal(params)
        S = MLE()
        grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True)
        params = params - lr * grad
        if np.linalg.norm(grad) < eps:
            break
コード例 #4
0
def test_multivariatenormal(k: 2, learner):
    dist = MultivariateNormal(k)

    # Generate some sample data
    N = 500
    X_train = np.random.randn(N, k)
    y_fns = [np.sin, np.cos, np.exp]
    y_cols = [
        fn(X_train[:, num_col]).reshape(-1, 1) + np.random.randn(N, 1)
        for num_col, fn in enumerate(y_fns[:k])
    ]
    y_train = np.hstack(y_cols)
    X_test = np.random.randn(N, k)

    ngb = NGBRegressor(Dist=dist, Score=LogScore, Base=learner, verbose=False)
    ngb.fit(X_train, y_train)
    y_pred = ngb.predict(X_test)
    y_dist = ngb.pred_dist(X_test)

    mean = y_dist.mean
    sample = y_dist.rv()
    scipy_list = y_dist.scipy_distribution()
コード例 #5
0
ファイル: test_score.py プロジェクト: yu-9824/ngboost
    )
    return metric_err


def idfn(dist_score: DistScore):
    dist, score = dist_score
    return dist.__name__ + "_" + score.__name__


TEST_METRIC: List[DistScore] = [
    (Normal, LogScore),
    (Normal, CRPScore),
    (TFixedDfFixedVar, LogScore),
    (Laplace, LogScore),
    (Poisson, LogScore),
] + [(MultivariateNormal(i), LogScore) for i in range(2, 5)]
# Fill in the dist, score pair to test the gradient
# Tests all in TEST_METRIC by default
TEST_GRAD: List[DistScore] = TEST_METRIC + [
    (Cauchy, LogScore),
    (T, LogScore),
    (TFixedDf, LogScore),
]


@pytest.mark.parametrize("dist_score_pair", TEST_GRAD, ids=idfn)
def test_dists_grad(dist_score_pair: DistScore):
    # Set seed as this test involves randomness
    # All errors around 1e-5 mark
    np.random.seed(9)
    dist, score = dist_score_pair
コード例 #6
0
def Y_join(T, E):
    col_event = 'Event'
    col_time = 'Time'
    y = np.empty(dtype=[(col_event, np.bool), (col_time, np.float64)],
                 shape=T.shape[0])
    y[col_event] = E
    y[col_time] = np.exp(T)
    return y


Y = Y_join(T, E)

params = np.array([[0, 0, 1, 0, 1]] * N).T
for _ in range(100000):
    D = MultivariateNormal(params)
    S = MLE()
    grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True)
    params = params - 1 * grad
    if np.linalg.norm(grad) < 1e-4:
        break

print('Jointly Estimated E:', params[0, 0])

params = np.array([[0, 0]] * N).T
for _ in range(100000):
    D = LogNormal(params)
    S = MLE()
    grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True)
    params = params - 0.1 * grad
    if np.linalg.norm(grad) < 1e-4:
コード例 #7
0
        sigma: (N,2) numpy array containing the variances
        corr: (N,) numpy array the correlation [-1,1] extracted from cov_mat
    """

    sigma = np.sqrt(np.diagonal(cov_mat, axis1=1, axis2=2))
    corr = cov_mat[:, 0, 1] / (sigma[:, 0] * sigma[:, 1])
    return sigma, corr


if __name__ == "__main__":

    SEED = 12345
    np.random.seed(SEED)
    X, Y, true_dist = simulate_data()
    X = X.reshape(-1, 1)
    dist = MultivariateNormal(2)

    data_figure, data_axs = plt.subplots()
    data_axs.plot(X, Y[:, 0], label="Dim 1")
    data_axs.plot(X, Y[:, 1], label="Dim 2")
    data_axs.set_xlabel("X")
    data_axs.set_ylabel("Y")
    data_axs.set_title("Input Data")
    data_axs.legend()
    data_figure.show()

    X_val, Y_val, _ = simulate_data(500)
    X_val = X_val.reshape(-1, 1)
    ngb = NGBRegressor(Dist=dist,
                       verbose=True,
                       n_estimators=2000,