def fixture_learners_data(breast_cancer_data, boston_data, boston_survival_data): """ Returns: A list of iterables, each iterable containing a fitted model and X data and the predictions for the X_data """ models_data = [] X_class_train, _, Y_class_train, _ = breast_cancer_data ngb = NGBClassifier(verbose=False, n_estimators=10) ngb.fit(X_class_train, Y_class_train) models_data.append((ngb, X_class_train, ngb.predict(X_class_train))) X_reg_train, _, Y_reg_train, _ = boston_data ngb = NGBRegressor(verbose=False, n_estimators=10) ngb.fit(X_reg_train, Y_reg_train) models_data.append((ngb, X_reg_train, ngb.predict(X_reg_train))) X_surv_train, _, T_surv_train, E_surv_train, _ = boston_survival_data ngb = NGBSurvival(verbose=False, n_estimators=10) ngb.fit(X_surv_train, T_surv_train, E_surv_train) models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train))) ngb = NGBRegressor(Dist=MultivariateNormal(2), n_estimators=10) ngb.fit(X_surv_train, np.vstack([T_surv_train, E_surv_train]).T) models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train))) return models_data
def estimate_infcens(Y): res = {} params = np.array([[0, 0, 1, 0, 1]] * N).T for _ in range(100000): D = MultivariateNormal(params) S = MLE() grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True) params = params - 1 * grad if np.linalg.norm(grad) < 1e-4: break print('Jointly Estimated E:', params[0, 0]) res['joint'] = params[0, 0] params = np.array([[0, 0]] * N).T for _ in range(100000): D = LogNormal(params) S = MLE() grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True) params = params - 0.005 * grad if np.linalg.norm(grad) < 1e-4: break print('Estimate E (assume non-inf):', params[0, 0]) res['lognorm'] = params[0, 0] return res
def mvnorm_mle(Y, max_iter=1e4, lr=0.5, eps=1e-4): N = Y.shape[0] params = np.array([[0, 0, 1, 0, 1]] * N).T for _ in range(max_iter): D = MultivariateNormal(params) S = MLE() grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True) params = params - lr * grad if np.linalg.norm(grad) < eps: break
def test_multivariatenormal(k: 2, learner): dist = MultivariateNormal(k) # Generate some sample data N = 500 X_train = np.random.randn(N, k) y_fns = [np.sin, np.cos, np.exp] y_cols = [ fn(X_train[:, num_col]).reshape(-1, 1) + np.random.randn(N, 1) for num_col, fn in enumerate(y_fns[:k]) ] y_train = np.hstack(y_cols) X_test = np.random.randn(N, k) ngb = NGBRegressor(Dist=dist, Score=LogScore, Base=learner, verbose=False) ngb.fit(X_train, y_train) y_pred = ngb.predict(X_test) y_dist = ngb.pred_dist(X_test) mean = y_dist.mean sample = y_dist.rv() scipy_list = y_dist.scipy_distribution()
) return metric_err def idfn(dist_score: DistScore): dist, score = dist_score return dist.__name__ + "_" + score.__name__ TEST_METRIC: List[DistScore] = [ (Normal, LogScore), (Normal, CRPScore), (TFixedDfFixedVar, LogScore), (Laplace, LogScore), (Poisson, LogScore), ] + [(MultivariateNormal(i), LogScore) for i in range(2, 5)] # Fill in the dist, score pair to test the gradient # Tests all in TEST_METRIC by default TEST_GRAD: List[DistScore] = TEST_METRIC + [ (Cauchy, LogScore), (T, LogScore), (TFixedDf, LogScore), ] @pytest.mark.parametrize("dist_score_pair", TEST_GRAD, ids=idfn) def test_dists_grad(dist_score_pair: DistScore): # Set seed as this test involves randomness # All errors around 1e-5 mark np.random.seed(9) dist, score = dist_score_pair
def Y_join(T, E): col_event = 'Event' col_time = 'Time' y = np.empty(dtype=[(col_event, np.bool), (col_time, np.float64)], shape=T.shape[0]) y[col_event] = E y[col_time] = np.exp(T) return y Y = Y_join(T, E) params = np.array([[0, 0, 1, 0, 1]] * N).T for _ in range(100000): D = MultivariateNormal(params) S = MLE() grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True) params = params - 1 * grad if np.linalg.norm(grad) < 1e-4: break print('Jointly Estimated E:', params[0, 0]) params = np.array([[0, 0]] * N).T for _ in range(100000): D = LogNormal(params) S = MLE() grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True) params = params - 0.1 * grad if np.linalg.norm(grad) < 1e-4:
sigma: (N,2) numpy array containing the variances corr: (N,) numpy array the correlation [-1,1] extracted from cov_mat """ sigma = np.sqrt(np.diagonal(cov_mat, axis1=1, axis2=2)) corr = cov_mat[:, 0, 1] / (sigma[:, 0] * sigma[:, 1]) return sigma, corr if __name__ == "__main__": SEED = 12345 np.random.seed(SEED) X, Y, true_dist = simulate_data() X = X.reshape(-1, 1) dist = MultivariateNormal(2) data_figure, data_axs = plt.subplots() data_axs.plot(X, Y[:, 0], label="Dim 1") data_axs.plot(X, Y[:, 1], label="Dim 2") data_axs.set_xlabel("X") data_axs.set_ylabel("Y") data_axs.set_title("Input Data") data_axs.legend() data_figure.show() X_val, Y_val, _ = simulate_data(500) X_val = X_val.reshape(-1, 1) ngb = NGBRegressor(Dist=dist, verbose=True, n_estimators=2000,