def test_estimate_loco_offsets_logistic_ridge_no_intercept_with_cov(spark):
    covdf = pd.read_csv(f'{logistic_ridge_data_root}/cov.csv').set_index('sample_id')
    covdf.index = covdf.index.astype(str, copy=False)
    labeldf = pd.read_csv(f'{logistic_ridge_data_root}/binary_phenotypes.csv').set_index('sample_id')
    labeldf.index = labeldf.index.astype(str, copy=False)
    indexdf = spark.read.parquet(f'{ridge_data_root}/groupedIDs.snappy.parquet')
    blockdf = spark.read.parquet(f'{ridge_data_root}/blockedGT.snappy.parquet')

    group2ids = __get_sample_blocks(indexdf)

    y_hat_df = estimate_loco_offsets(blockdf,
                                     labeldf,
                                     group2ids,
                                     covdf,
                                     add_intercept=False,
                                     reduction_alphas=alphas,
                                     regression_alphas=alphas)

    stack0 = RidgeReduction(blockdf, labeldf, group2ids, covdf, add_intercept=False, alphas=alphas)
    stack0.fit_transform()
    regressor = LogisticRidgeRegression.from_ridge_reduction(stack0, alphas)
    yhatdf = regressor.fit_transform_loco()
    print(y_hat_df)

    assert (np.allclose(y_hat_df, yhatdf))
Beispiel #2
0
def test_estimate_loco_offsets_ridge(spark):
    labeldf = pd.read_csv(f'{ridge_data_root}/pts.csv',
                          dtype={
                              'sample_id': 'str'
                          }).set_index('sample_id')
    indexdf = spark.read.parquet(
        f'{ridge_data_root}/groupedIDs.snappy.parquet')
    blockdf = spark.read.parquet(f'{ridge_data_root}/blockedGT.snappy.parquet')

    group2ids = __get_sample_blocks(indexdf)

    y_hat_df = estimate_loco_offsets(blockdf,
                                     labeldf,
                                     group2ids,
                                     add_intercept=False,
                                     reduction_alphas=alphas,
                                     regression_alphas=alphas)

    stack0 = RidgeReduction(blockdf,
                            labeldf,
                            group2ids,
                            add_intercept=False,
                            alphas=alphas)
    stack0.fit_transform()
    regressor = RidgeRegression.from_ridge_reduction(stack0, alphas)
    yhatdf = regressor.fit_transform_loco()

    assert (np.allclose(y_hat_df, yhatdf))