Python hrt Examples

Programming Language: Python

Namespace/Package Name: pyhrt.hrt

Method/Function: hrt

Examples at hotexamples.com: 5

Python hrt - 5 examples found. These are the top rated real world Python examples of pyhrt.hrt.hrt extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def run_hrt(
    feat_idx,
    X_drug,
    y_drug,
    elastic_model,
    features,
    ccle_features,
    pca_components=100,
    discrete_threshold=10,
    nbootstraps=100,
    nperms=5000,
    verbose=False,
):
    gene_target = ccle_features[feat_idx]
    feature = features.get_loc(gene_target)
    nunique = np.unique(X_drug[:, feature]).shape[0]
    if verbose:
        print(
            "{} is feature number {} with {} unique values".format(
                gene_target, feature, nunique
            )
        )
    fmask = np.ones(X_drug.shape[1], dtype=bool)
    fmask[feature] = False
    X_transform = X_drug[:, fmask]
    from sklearn.decomposition import PCA

    pca = PCA(n_components=pca_components)
    X_transform = pca.fit_transform(X_transform)
    X_transform = np.concatenate(
        [X_drug[:, feature : feature + 1], X_transform], axis=1
    )
    if nunique <= discrete_threshold:
        if verbose:
            print("Using discrete conditional")
        results = calibrate_discrete(X_transform, 0, nbootstraps=nbootstraps)
    else:
        if verbose:
            print("Using continuous conditional")
        results = calibrate_continuous(X_transform, 0, nbootstraps=nbootstraps)
    conditional = results["sampler"]
    tstat = lambda X_test: ((y_drug - elastic_model.predict(X_test)) ** 2).mean()
    p_value = hrt(
        feature,
        tstat,
        X_drug,
        nperms=nperms,
        conditional=conditional,
        lower=conditional.quantiles[0],
        upper=conditional.quantiles[1],
    )["p_value"]
    return p_value

Example #2

Show file

def run_hrt(
    target_feature,
    X,
    y,
    features,
    model,
    pca_components=100,
    discrete_threshold=10,
    nbootstraps=100,
    nperms=5000,
    verbose=False,
):
    feature_idx = features.get_loc(target_feature)
    fmask = np.ones(X.shape[1], dtype=bool)
    fmask[feature_idx] = False
    X_transform = X[:, fmask]
    if pca_components is not None:
        from sklearn.decomposition import PCA

        pca = PCA(n_components=pca_components)
        X_transform = pca.fit_transform(X_transform)
        X_transform = np.concatenate(
            [X[:, feature_idx:feature_idx + 1], X_transform], axis=1)
    nunique = np.unique(X[:, feature_idx]).shape[0]
    if nunique <= discrete_threshold:
        if verbose:
            print("Using discrete conditional")
        results = calibrate_discrete(X_transform, 0, nbootstraps=nbootstraps)
    else:
        if verbose:
            print("Using continuous conditional")
        results = calibrate_continuous(X_transform, 0, nbootstraps=nbootstraps)
    conditional = results["sampler"]
    tstat = lambda X_test: ((y - model.predict(X_test))**2).mean()
    p_value = hrt(
        feature_idx,
        tstat,
        X,
        nperms=nperms,
        conditional=conditional,
        lower=conditional.quantiles[0],
        upper=conditional.quantiles[1],
    )["p_value"]
    return p_value

Example #3

Show file

def main():
    N = 500  # total number of samples
    P = 500  # number of features
    S = 40  # number of signal features
    nperms = 5000
    nbootstraps = 100
    fdr_threshold = 0.1
    trial = int(sys.argv[1])
    feature = int(sys.argv[2])
    intervals = np.array([0, 5, 10, 15, 20, 25, 30, 35, 40, 45])
    lower, upper = (50 - intervals), 50 + intervals
    reset_models = len(sys.argv) > 3 and "--reset-models" in sys.argv[3:]
    TRIAL_PATH = "data/{}".format(trial)
    X_PATH = "data/{}/X.csv".format(trial)
    Y_PATH = "data/{}/Y.csv".format(trial)
    TRUTH_PATH = "data/{}/truth.csv".format(trial)
    LINEAR_PATH = "data/{}/cv_linear.pt".format(trial)
    NONLINEAR_PATH = "data/{}/cv_nonlinear.pt".format(trial)
    P_LINEAR_PATH = "data/{}/sweep_robust_linear_p_values".format(trial)
    P_NONLINEAR_PATH = "data/{}/sweep_robust_nonlinear_p_values".format(trial)
    Pi_LINEAR_PATH = "data/{}/sweep_robust_linear_p_value_{}".format(
        trial, feature)
    Pi_NONLINEAR_PATH = "data/{}/sweep_robust_nonlinear_p_value_{}".format(
        trial, feature)

    X = np.loadtxt(X_PATH, delimiter=",")
    y = np.loadtxt(Y_PATH, delimiter=",")
    truth = np.loadtxt(TRUTH_PATH, delimiter=",")

    if reset_models:
        print("Fitting models with N={} P={} S={} nperms={}".format(
            N, P, S, nperms))
        sys.stdout.flush()
        linear_model = fit_cv(X, y, verbose=False, model_type="linear")
        nonlinear_model = fit_cv(X, y, verbose=False, model_type="nonlinear")
        torch.save(linear_model, LINEAR_PATH)
        torch.save(nonlinear_model, NONLINEAR_PATH)
    else:
        linear_model = torch.load(LINEAR_PATH)
        nonlinear_model = torch.load(NONLINEAR_PATH)

    linear_p_values = load_or_create(P_LINEAR_PATH, P, intervals)
    nonlinear_p_values = load_or_create(P_NONLINEAR_PATH, P, intervals)

    print(
        "Testing with N={} P={} S={} nperms={} nbootstraps={} interval=[{},{}]"
        .format(N, P, S, nperms, nbootstraps, lower, upper))

    # test statistics for the two models
    tstat_linear = lambda X_test: (
        (y - linear_model.predict(X_test))**2).mean()
    tstat_nonlinear = lambda X_test: (
        (y - nonlinear_model.predict(X_test))**2).mean()

    print("Feature: {}".format(feature))

    conditional = None
    linear_p_value = linear_p_values[feature]
    if np.any(np.isnan(linear_p_value)) and os.path.exists(Pi_LINEAR_PATH +
                                                           ".npy"):
        linear_p_value = np.load(Pi_LINEAR_PATH + ".npy")
    if np.any(np.isnan(linear_p_value)):
        print("Running linear robust CVR test")
        linear_results = hrt(
            feature,
            tstat_linear,
            X,
            nperms=nperms,
            nbootstraps=nbootstraps,
            conditional=conditional,
            lower=lower,
            upper=upper,
        )
        # Get the results and reuse the conditional model
        linear_p_value = linear_results["p_value"]
        conditional = linear_results["sampler"]
        np.save(Pi_LINEAR_PATH, linear_p_value)

    nonlinear_p_value = nonlinear_p_values[feature]
    if np.any(
            np.isnan(nonlinear_p_value)) and os.path.exists(Pi_NONLINEAR_PATH +
                                                            ".npy"):
        nonlinear_p_value = np.load(Pi_NONLINEAR_PATH + ".npy")
    if np.any(np.isnan(nonlinear_p_value)):
        print("Running nonlinear robust CVR test")
        nonlinear_results = hrt(
            feature,
            tstat_nonlinear,
            X,
            nperms=nperms,
            nbootstraps=nbootstraps,
            conditional=conditional,
            lower=lower,
            upper=upper,
        )
        nonlinear_p_value = nonlinear_results["p_value"]
        np.save(Pi_NONLINEAR_PATH, nonlinear_p_value)

    print(
        "p-values Robust CVR (linear): {}\nRobust CVR (nonlinear): {}".format(
            pretty_str(linear_p_value), pretty_str(nonlinear_p_value)))
    # print('t-weights Robust CVR (linear): {}\nRobust CVR (nonlinear): {}'.format(pretty_str(linear_results['t_weights'] / linear_results['t_weights'].mean()), pretty_str(nonlinear_results['t_weights']/nonlinear_results['t_weights'].mean())))

    # linear_predictions = bh_predictions(linear_p_values, fdr_threshold)
    # nonlinear_predictions = bh_predictions(nonlinear_p_values, fdr_threshold)

    # linear_tpr = tpr(truth, linear_predictions)
    # linear_fdr = fdr(truth, linear_predictions)
    # nonlinear_tpr = tpr(truth, nonlinear_predictions)
    # nonlinear_fdr = fdr(truth, nonlinear_predictions)

    # print('Robust cross-validation randomization test (linear)')
    # print('TPR: {:.2f}%'.format(linear_tpr*100))
    # print('FDR: {:.2f}%'.format(linear_fdr*100))
    # print('')
    # sys.stdout.flush()

    # print('Robust cross-validation randomization test (nonlinear)')
    # print('TPR: {:.2f}%'.format(nonlinear_tpr*100))
    # print('FDR: {:.2f}%'.format(nonlinear_fdr*100))
    # print('')
    # sys.stdout.flush()

    # if trial == 0:
    #     with sns.axes_style('white', {'legend.frameon': True}):
    #         plt.rc('font', weight='bold')
    #         plt.rc('grid', lw=3)
    #         plt.rc('lines', lw=2)
    #         plt.rc('axes', lw=2)
    #         plt.scatter(np.arange(P), linear_p_values, color='red', label='Linear CVR test')
    #         plt.scatter(np.arange(P), nonlinear_p_values, color='blue', label='Non-linear CVR test')
    #         plt.axvline(S + 0.5, ls='--', color='black')
    #         plt.xlabel('Feature index', fontsize=18, weight='bold')
    #         plt.ylabel('p-value', fontsize=18, weight='bold')
    #         legend_props = {'weight': 'bold', 'size': 14}
    #         plt.legend(loc='upper right', prop=legend_props)
    #         plt.savefig('plots/liang-p-values-cv.pdf', bbox_inches='tight')
    #         plt.close()

    #         plt.scatter(linear_p_values[:S], nonlinear_p_values[:S], color='orange', label='True signals')
    #         plt.scatter(linear_p_values[S:], nonlinear_p_values[S:], color='gray', label='True nulls')
    #         plt.xlabel('Linear CVR p-values', fontsize=18, weight='bold')
    #         plt.ylabel('Non-linear CVR p-values', fontsize=18, weight='bold')
    #         plt.plot([0,1],[0,1], color='blue')
    #         legend_props = {'weight': 'bold', 'size': 14}
    #         plt.legend(loc='upper left', prop=legend_props)
    #         plt.savefig('plots/liang-linear-vs-nonlinear-p-values-cv.pdf', bbox_inches='tight')
    #         plt.close()

    print("Done!")
    sys.stdout.flush()

Example #4

Show file

def run(trial, feature, reset=False):
    N = 500  # total number of samples
    P = 500  # number of features
    S = 40  # number of signal features
    nperms = 5000
    fdr_threshold = 0.1
    nfolds = 5

    X, y, truth = load_or_create_dataset(trial, N, P, S)

    np.random.seed(trial * P + feature)

    infos = [
        ModelInfo(trial, "Partial Least Squares", fit_pls, "pls"),
        ModelInfo(trial, "Lasso", fit_lasso_cv, "lasso"),
        ModelInfo(trial, "Elastic Net", fit_elastic_net_cv, "enet"),
        ModelInfo(trial, "Bayesian Ridge", fit_bridge, "bridge"),
        ModelInfo(trial, "Polynomial Kernel Ridge", fit_kridge, "kridge"),
        ModelInfo(trial, "RBF Support Vector", fit_svr, "svr"),
        ModelInfo(trial, "Random Forest", fit_forest, "rf")
        # ModelInfo(trial, 'Extra Trees', fit_extratrees, 'xtrees')
    ]

    folds = get_model(infos[0], X, y, create_folds(X, nfolds), reset).folds
    models = [get_model(info, X, y, folds, reset) for info in infos]

    # Create the test statistic for each model
    # tstats = [(lambda X_target: ((y - model.predict(X_target))**2).mean()) for model in models]

    # Load the conditional model for this feature
    conditional = get_conditional(trial, feature)

    # Run the normal CVRT for the first model, but save the null samples to
    # avoid recomputing them for the rest of the models.
    info, model = infos[0], models[0]
    tstat = lambda X_target: ((y - model.predict(X_target))**2).mean()
    print("Running CVRT for {}".format(info.name))
    results = hrt(
        feature,
        tstat,
        X,
        nperms=nperms,
        conditional=conditional,
        lower=conditional.quantiles[0],
        upper=conditional.quantiles[1],
        save_nulls=True,
    )
    p_value = results["p_value"]
    print("p={}".format(p_value))
    np.save("data/{}/{}_{}.npy".format(trial, info.prefix, feature), p_value)

    # Get the relevant values from the full CVRT on the first model
    t_true = results["t_stat"]
    X_nulls = results["samples_null"]
    quantile_nulls = results["quantiles_null"]

    # Run the CVRTs for the remaining models using the same null samples
    X_null = np.copy(X)
    for info, model in zip(infos[1:], models[1:]):
        print("Running cached CVRT for {}".format(info.name))
        t_weights = np.full(nperms, np.nan)
        t_null = np.full(nperms, np.nan)
        tstat = lambda X_target: ((y - model.predict(X_target))**2).mean()
        t_true = tstat(X)
        for perm in range(nperms):
            if (perm % 500) == 0:
                print("Trial {}".format(perm))

            # Get the test-statistic under the null
            X_null[:, feature] = X_nulls[perm]
            t_null[perm] = tstat(X_null)
            if t_null[perm] <= t_true:
                # Over-estimate the likelihood
                t_weights[perm] = quantile_nulls[perm, 1]
            else:
                # Under-estimate the likelihood
                t_weights[perm] = quantile_nulls[perm, 0]

        p_value = t_weights[t_null <= t_true].sum() / t_weights.sum()
        print("p={}".format(p_value))
        np.save("data/{}/{}_{}.npy".format(trial, info.prefix, feature),
                p_value)

Example #5

Show file

def run(trial, feature, reset, cv, robust):
    N = 500  # total number of samples
    P = 500  # number of features
    S = 40  # number of signal features
    T = 100  # test sample size
    nperms = 5000
    fdr_threshold = 0.1

    model_prefix = "cv_" if cv else ""
    p_prefix = "cv_" if cv else ""
    p_prefix += "robust_" if robust else ""
    nbootstraps = 100 if robust else 1
    LINEAR_PATH = "data/{}/{}linear.pt".format(trial, model_prefix)
    NONLINEAR_PATH = "data/{}/{}nonlinear.pt".format(trial, model_prefix)
    P_PERM_PATH = "data/{}/{}perm_p_values".format(trial, p_prefix)
    P_LINEAR_PATH = "data/{}/{}linear_p_values".format(trial, p_prefix)
    P_NONLINEAR_PATH = "data/{}/{}nonlinear_p_values".format(trial, p_prefix)
    Pi_PERM_PATH = "data/{}/{}perm_p_values_{}".format(trial, p_prefix,
                                                       feature)
    Pi_LINEAR_PATH = "data/{}/{}linear_p_values_{}".format(
        trial, p_prefix, feature)
    Pi_NONLINEAR_PATH = "data/{}/{}nonlinear_p_values_{}".format(
        trial, p_prefix, feature)
    BOUNDS_LINEAR_PATH = "data/{}/{}linear_bounds_{}".format(
        trial, p_prefix, feature)
    BOUNDS_NONLINEAR_PATH = "data/{}/{}nonlinear_bounds_{}".format(
        trial, p_prefix, feature)
    CONDITIONAL_PATH = "data/{}/conditional_{}{}.pt".format(
        trial, p_prefix, feature)

    X, y, truth = load_or_create_dataset(trial, N, P, S)

    # Load the checkpoint if available
    if not reset and os.path.exists(LINEAR_PATH):
        linear_model = torch.load(LINEAR_PATH)
        nonlinear_model = torch.load(NONLINEAR_PATH)
    else:
        # Train the model
        print("Fitting models with N={} P={} S={} T={} nperms={}".format(
            N, P, S, T, nperms))
        sys.stdout.flush()
        if cv:
            print("Using CV models")
            linear_model = fit_cv(X, y, verbose=False, model_type="linear")
            nonlinear_model = fit_cv(X,
                                     y,
                                     verbose=False,
                                     model_type="nonlinear")
        else:
            print("Using holdout models")
            linear_model = fit_nn(X[:-T],
                                  y[:-T],
                                  verbose=False,
                                  model_type="linear")
            nonlinear_model = fit_nn(X[:-T],
                                     y[:-T],
                                     verbose=False,
                                     model_type="nonlinear")
        torch.save(linear_model, LINEAR_PATH)
        torch.save(nonlinear_model, NONLINEAR_PATH)

    # Track all the p-values
    perm_p_values = load_or_create(P_PERM_PATH, P) if not robust else None
    linear_p_values = load_or_create(P_LINEAR_PATH, P)
    nonlinear_p_values = load_or_create(P_NONLINEAR_PATH, P)

    # test statistics for the two models
    y_train = y if cv else y[:-T]
    y_test = y if cv else y[-T:]
    X_train = X if cv else X[:-T]
    X_test = None if cv else X[-T:]
    tstat_linear = lambda X_target: (
        (y_test - linear_model.predict(X_target))**2).mean()
    tstat_nonlinear = lambda X_target: (
        (y_test - nonlinear_model.predict(X_target))**2).mean()

    if trial == 0:
        import matplotlib

        matplotlib.use("Agg")
        import seaborn as sns

        with sns.axes_style("white", {"legend.frameon": True}):
            plt.rc("font", weight="bold")
            plt.rc("grid", lw=3)
            plt.rc("lines", lw=2)
            plt.rc("axes", lw=2)
            plt.scatter(y_train, nonlinear_model.predict(X_train))
            plt.plot([y.min(), y.max()], [y.min(), y.max()],
                     color="red",
                     ls="--")
            plt.xlabel("Truth", fontsize=18, weight="bold")
            plt.ylabel("Predicted", fontsize=18, weight="bold")
            plt.savefig(
                "plots/liang-nonlinear-fit{}.pdf".format("-cv" if cv else ""),
                bbox_inches="tight",
            )
            plt.close()

            plt.rc("font", weight="bold")
            plt.rc("grid", lw=3)
            plt.rc("lines", lw=2)
            plt.rc("axes", lw=2)
            plt.scatter(y_train, linear_model.predict(X_train))
            plt.plot([y.min(), y.max()], [y.min(), y.max()],
                     color="red",
                     ls="--")
            plt.xlabel("Truth", fontsize=18, weight="bold")
            plt.ylabel("Predicted", fontsize=18, weight="bold")
            plt.savefig(
                "plots/liang-linear-fit{}.pdf".format("-cv" if cv else ""),
                bbox_inches="tight",
            )
            plt.close()

    conditional = None
    lower = None
    upper = None
    perm_folds = nonlinear_model.folds if cv else None
    print("Feature: {}".format(feature))

    if not robust:
        print("Running permutation test")
        if np.isnan(
                perm_p_values[feature]) and not os.path.exists(Pi_PERM_PATH +
                                                               ".npy"):
            permer = PermutationConditional(X if cv else X[-T:], feature,
                                            perm_folds)
            perm_p_value = hrt(
                feature,
                tstat_nonlinear,
                X_train,
                X_test=X_test,
                nperms=nperms,
                conditional=permer,
            )["p_value"]
            np.save(Pi_PERM_PATH, perm_p_value)
            print("Trial {} feature {} {} {} permutation p={}".format(
                trial,
                feature,
                "robust" if robust else "",
                "cv" if cv else "",
                perm_p_value,
            ))

    print("Running linear HRT")
    if np.isnan(
            linear_p_values[feature]) and not os.path.exists(Pi_LINEAR_PATH +
                                                             ".npy"):
        linear_results = hrt(
            feature,
            tstat_linear,
            X_train,
            X_test=X_test,
            nperms=nperms,
            nbootstraps=nbootstraps,
            conditional=conditional,
        )
        linear_p_value = linear_results["p_value"]
        conditional = linear_results["sampler"]
        np.save(Pi_LINEAR_PATH, linear_p_value)
        print("Trial {} feature {} {} {} linear hrt p={}".format(
            trial,
            feature,
            "robust" if robust else "",
            "cv" if cv else "",
            linear_p_value,
        ))
        if robust:
            lower = linear_results["lower"]
            upper = linear_results["upper"]
            np.save(BOUNDS_LINEAR_PATH, np.concatenate([lower, upper]))

    print("Running nonlinear HRT")
    if np.isnan(nonlinear_p_values[feature]
                ) and not os.path.exists(Pi_NONLINEAR_PATH + ".npy"):
        nonlinear_results = hrt(
            feature,
            tstat_nonlinear,
            X_train,
            X_test=X_test,
            nperms=nperms,
            nbootstraps=nbootstraps,
            conditional=conditional,
            lower=lower,
            upper=upper,
        )
        nonlinear_p_value = nonlinear_results["p_value"]
        np.save(Pi_NONLINEAR_PATH, nonlinear_p_value)
        torch.save(nonlinear_results["sampler"], CONDITIONAL_PATH)
        print("Trial {} feature {} {} {} nonlinear hrt p={}".format(
            trial,
            feature,
            "robust" if robust else "",
            "cv" if cv else "",
            nonlinear_p_value,
        ))
        if robust:
            lower = nonlinear_results["lower"]
            upper = nonlinear_results["upper"]
            np.save(BOUNDS_NONLINEAR_PATH, np.concatenate([lower, upper]))

    print("")
    print("Done!")
    sys.stdout.flush()