def test_genotypes_exact():
    """
    Test whether genotype knockoffs with true HMM are accurate
    """
    p = 10
    K = 3
    M = 3
    n_train = 1000
    n_test = 100000
    pInit, Q, pEmit = generate_HMM(p, K, M)
    modelX = models.HMM(pInit, Q, pEmit)
    X = modelX.sample(n_train)
    _, Xfp_file = tempfile.mkstemp()
    fp.writeXtoInp(X, Xfp_file)
    fastphase = "fastphase"  # Name of fastPhase executable
    _, out_path = tempfile.mkstemp()
    fp.runFastPhase(Xfp_file, out_path, fastphase=fastphase, K=5, numit=25)
    r_file = out_path + "_rhat.txt"
    alpha_file = out_path + "_alphahat.txt"
    theta_file = out_path + "_thetahat.txt"
    char_file = out_path + "_origchars"
    hmm = fp.loadHMM(r_file, alpha_file, theta_file, char_file)
    knockoffs = knockoffGenotypes(hmm["r"],
                                  hmm["alpha"],
                                  hmm["theta"],
                                  seed=123)
    hmm_hat = fp.loadHMM(r_file,
                         alpha_file,
                         theta_file,
                         char_file,
                         compact=False)
    modelX_hat = models.HMM(hmm_hat["pInit"], hmm_hat["Q"], hmm_hat["pEmit"])
    X_new = modelX_hat.sample(n_test)
    Xk_new = knockoffs.sample(X_new)
    verify_exchangeability(X_new, Xk_new, tolerance=1e-3)
def test_haplotypes_hmm():
    """
    Test whether specialized haplotype knockoff algorithm agrees with special case
    """
    p = 10
    K = 5
    M = 2
    n_train = 1000
    n_test = 100000
    pInit, Q, pEmit = generate_HMM(p, K, M)
    modelX = models.HMM(pInit, Q, pEmit)
    X = modelX.sample(n_train)
    _, Xfp_file = tempfile.mkstemp()
    fp.writeXtoInp(X, Xfp_file, phased=True)
    fastphase = "fastphase"  # Name of fastPhase executable
    _, out_path = tempfile.mkstemp()
    fp.runFastPhase(Xfp_file,
                    out_path,
                    fastphase=fastphase,
                    phased=True,
                    K=5,
                    numit=25)
    r_file = out_path + "_rhat.txt"
    alpha_file = out_path + "_alphahat.txt"
    theta_file = out_path + "_thetahat.txt"
    char_file = out_path + "_origchars"
    groups = np.repeat(np.arange(p), 3)[:p]
    hmm_compact = fp.loadHMM(r_file, alpha_file, theta_file, char_file)
    hmm = fp.loadHMM(r_file,
                     alpha_file,
                     theta_file,
                     char_file,
                     compact=False,
                     phased=True)
    knockoffs = knockoffHMM(hmm["pInit"],
                            hmm["Q"],
                            hmm["pEmit"],
                            groups=groups,
                            seed=123)
    knockoffs_hap = knockoffHaplotypes(hmm_compact["r"], hmm_compact["alpha"], hmm_compact["theta"], \
                                       groups=groups, seed=123)
    hmm_hat = fp.loadHMM(r_file,
                         alpha_file,
                         theta_file,
                         char_file,
                         compact=False,
                         phased=True)
    Xk = knockoffs.sample(X)
    Xk_compact = knockoffs_hap.sample(X)
    assert np.array_equal(
        Xk, Xk_compact), "Knockoffs with trivial groups do not match"
def test_haplotypes_fastphase():
    """
    Test whether haplotype knockoffs with HMM fitted by fastPHASE are accurate
    """
    p = 10
    K = 3
    M = 2
    n = 1000
    pInit, Q, pEmit = generate_HMM(p, K, M)
    modelX = models.HMM(pInit, Q, pEmit)
    X = modelX.sample(n)
    _, Xfp_file = tempfile.mkstemp()
    fp.writeXtoInp(X, Xfp_file, phased=True)
    fastphase = "fastphase"  # Name of fastPhase executable
    _, out_path = tempfile.mkstemp()
    fp.runFastPhase(Xfp_file,
                    out_path,
                    fastphase=fastphase,
                    phased=True,
                    K=5,
                    numit=25)
    r_file = out_path + "_rhat.txt"
    alpha_file = out_path + "_alphahat.txt"
    theta_file = out_path + "_thetahat.txt"
    char_file = out_path + "_origchars"
    hmm = fp.loadHMM(r_file, alpha_file, theta_file, char_file, phased=True)
    knockoffs = knockoffHaplotypes(hmm["r"],
                                   hmm["alpha"],
                                   hmm["theta"],
                                   seed=123)
    Xk = knockoffs.sample(X)
    verify_exchangeability(X, Xk, tolerance=1e-1)
Exemple #4
0
import numpy as np
from matplotlib import pyplot as plt
from SNPknock.fastphase import loadHMM
from SNPknock import models
from SNPknock import knockoffHMM, knockoffHaplotypes, knockoffGenotypes
import util, pdb

# Load HMM
r_file = "data/haplotypes_rhat.txt"
alpha_file = "data/haplotypes_alphahat.txt"
theta_file = "data/haplotypes_thetahat.txt"
char_file = "data/haplotypes_origchars"
hmm = loadHMM(r_file, alpha_file, theta_file, char_file, compact=True)
hmm_full = loadHMM(r_file, alpha_file, theta_file, char_file, compact=False)

# Sample X
n = 10
modelX = models.HMM(hmm_full['pInit'], hmm_full['Q'], hmm_full['pEmit'])
X = modelX.sample(n)

# Generate the knockoffs
#knockoffs = knockoffHMM(hmm_full['pInit'], hmm_full['Q'], hmm_full['pEmit'])
#Xk = knockoffs.sample(X)
#print("Generated knockoffs")

# Generate the knockoffs (genotypes)
knockoffs_gen = knockoffGenotypes(hmm['r'], hmm['alpha'], hmm['theta'])
Xk = knockoffs_gen.sample(X)
print("Generated knockoffs (genotypes)")

# Generate the knockoffs (haplotypes)