def test_genotypes_exact():
    """
    Test whether genotype knockoffs with true HMM are accurate
    """
    p = 10
    K = 3
    M = 3
    n_train = 1000
    n_test = 100000
    pInit, Q, pEmit = generate_HMM(p, K, M)
    modelX = models.HMM(pInit, Q, pEmit)
    X = modelX.sample(n_train)
    _, Xfp_file = tempfile.mkstemp()
    fp.writeXtoInp(X, Xfp_file)
    fastphase = "fastphase"  # Name of fastPhase executable
    _, out_path = tempfile.mkstemp()
    fp.runFastPhase(Xfp_file, out_path, fastphase=fastphase, K=5, numit=25)
    r_file = out_path + "_rhat.txt"
    alpha_file = out_path + "_alphahat.txt"
    theta_file = out_path + "_thetahat.txt"
    char_file = out_path + "_origchars"
    hmm = fp.loadHMM(r_file, alpha_file, theta_file, char_file)
    knockoffs = knockoffGenotypes(hmm["r"],
                                  hmm["alpha"],
                                  hmm["theta"],
                                  seed=123)
    hmm_hat = fp.loadHMM(r_file,
                         alpha_file,
                         theta_file,
                         char_file,
                         compact=False)
    modelX_hat = models.HMM(hmm_hat["pInit"], hmm_hat["Q"], hmm_hat["pEmit"])
    X_new = modelX_hat.sample(n_test)
    Xk_new = knockoffs.sample(X_new)
    verify_exchangeability(X_new, Xk_new, tolerance=1e-3)
def test_haplotypes_fastphase():
    """
    Test whether haplotype knockoffs with HMM fitted by fastPHASE are accurate
    """
    p = 10
    K = 3
    M = 2
    n = 1000
    pInit, Q, pEmit = generate_HMM(p, K, M)
    modelX = models.HMM(pInit, Q, pEmit)
    X = modelX.sample(n)
    _, Xfp_file = tempfile.mkstemp()
    fp.writeXtoInp(X, Xfp_file, phased=True)
    fastphase = "fastphase"  # Name of fastPhase executable
    _, out_path = tempfile.mkstemp()
    fp.runFastPhase(Xfp_file,
                    out_path,
                    fastphase=fastphase,
                    phased=True,
                    K=5,
                    numit=25)
    r_file = out_path + "_rhat.txt"
    alpha_file = out_path + "_alphahat.txt"
    theta_file = out_path + "_thetahat.txt"
    char_file = out_path + "_origchars"
    hmm = fp.loadHMM(r_file, alpha_file, theta_file, char_file, phased=True)
    knockoffs = knockoffHaplotypes(hmm["r"],
                                   hmm["alpha"],
                                   hmm["theta"],
                                   seed=123)
    Xk = knockoffs.sample(X)
    verify_exchangeability(X, Xk, tolerance=1e-1)
def test_haplotypes_hmm():
    """
    Test whether specialized haplotype knockoff algorithm agrees with special case
    """
    p = 10
    K = 5
    M = 2
    n_train = 1000
    n_test = 100000
    pInit, Q, pEmit = generate_HMM(p, K, M)
    modelX = models.HMM(pInit, Q, pEmit)
    X = modelX.sample(n_train)
    _, Xfp_file = tempfile.mkstemp()
    fp.writeXtoInp(X, Xfp_file, phased=True)
    fastphase = "fastphase"  # Name of fastPhase executable
    _, out_path = tempfile.mkstemp()
    fp.runFastPhase(Xfp_file,
                    out_path,
                    fastphase=fastphase,
                    phased=True,
                    K=5,
                    numit=25)
    r_file = out_path + "_rhat.txt"
    alpha_file = out_path + "_alphahat.txt"
    theta_file = out_path + "_thetahat.txt"
    char_file = out_path + "_origchars"
    groups = np.repeat(np.arange(p), 3)[:p]
    hmm_compact = fp.loadHMM(r_file, alpha_file, theta_file, char_file)
    hmm = fp.loadHMM(r_file,
                     alpha_file,
                     theta_file,
                     char_file,
                     compact=False,
                     phased=True)
    knockoffs = knockoffHMM(hmm["pInit"],
                            hmm["Q"],
                            hmm["pEmit"],
                            groups=groups,
                            seed=123)
    knockoffs_hap = knockoffHaplotypes(hmm_compact["r"], hmm_compact["alpha"], hmm_compact["theta"], \
                                       groups=groups, seed=123)
    hmm_hat = fp.loadHMM(r_file,
                         alpha_file,
                         theta_file,
                         char_file,
                         compact=False,
                         phased=True)
    Xk = knockoffs.sample(X)
    Xk_compact = knockoffs_hap.sample(X)
    assert np.array_equal(
        Xk, Xk_compact), "Knockoffs with trivial groups do not match"
def test_HMM():
    """
    Test whether the HMM knockoff generation is correct
    """
    p = 10
    K = 4
    M = 5
    n = 100000
    pInit, Q, pEmit = generate_HMM(p, K, M)
    modelX = models.HMM(pInit, Q, pEmit)
    X = modelX.sample(n)
    knockoffs = knockoffHMM(pInit, Q, pEmit, seed=123)
    Xk = knockoffs.sample(X)
    verify_exchangeability(X, Xk)
def test_HMM_groups():
    """
    Test whether the HMM knockoff generation is correct
    """
    p = 10
    K = 4
    M = 5
    n = 100000
    pInit, Q, pEmit = generate_HMM(p, K, M)
    modelX = models.HMM(pInit, Q, pEmit)
    X = modelX.sample(n)
    groups = np.repeat(np.arange(p), 3)[:p]
    knockoffs = knockoffHMM(pInit, Q, pEmit, groups=groups, seed=123)
    Xk = knockoffs.sample(X)
    verify_exchangeability(X, Xk, groups=groups)
Exemple #6
0
    def __init__(self,
                 num_samples,
                 num_dim,
                 num_hidden_states,
                 num_emission_states=3,
                 **sampler_x_args):
        self.n_samples = num_samples
        self.dim = num_dim
        self.n_hidden_states = num_hidden_states
        self.n_emission_states = num_emission_states

        self.parameters = sample_parameters(self.dim, self.n_hidden_states,
                                            self.n_emission_states)

        self.modelX = models.HMM(self.parameters['pInit'],
                                 self.parameters['Q'],
                                 self.parameters['pEmit'])

        self.sample()
def test_HMM_basic():
    """
    Test whether the HMM knockoff generation function does not crash
    """
    p = 50
    K = 4
    M = 5
    n = 100
    pInit, Q, pEmit = generate_HMM(p, K, M)
    modelX = models.HMM(pInit, Q, pEmit)
    X = modelX.sample(n)
    knockoffs = knockoffHMM(pInit, Q, pEmit, seed=123)
    Xk = knockoffs.sample(X)
    groups = np.arange(p)
    knockoffs_g = knockoffHMM(pInit, Q, pEmit, groups=groups, seed=123)
    Xk_g = knockoffs_g.sample(X)
    assert np.array_equal(Xk,
                          Xk_g), "Knockoffs with trivial groups do not match"
    assert np.isfinite(Xk).all(), "Knockoffs are not finite"
pEmit = np.zeros((p,M,K))
gamma = np.random.uniform(low=0, high=10, size=p)
for j in range(p-1):    
    Q[j,:,:] = np.resize(np.random.uniform(size=K*K),(K,K))
    Q[j,:,:] += np.diag([gamma[j]]*K) 
    Q[j,:,:] /= np.sum(Q[j,:,:],1)[:,None]
for j in range(p):
    pEmit[j,:,:] = np.resize(np.random.uniform(size=M*K),(M,K))    
    pEmit[j,:,:] += np.diag([gamma[j]]*K) 
    pEmit[j,:,:] /= np.sum(pEmit[j,:,:],0)
pInit = np.zeros((K,))
pInit[0] = 1

# Sample X
n=10000
modelX = models.HMM(pInit, Q, pEmit)
X = modelX.sample(n)

# Generate the knockoffs
knockoffs = knockoffHMM(pInit, Q, pEmit)
Xk = knockoffs.sample(X)

# Plot paths
util.plotPaths(X,Xk)

# Compare original variables and knockoffs 
util.compare_marginals(X,Xk)
util.compare_cons_corr(X,Xk)
util.compare_cross_corr(X,Xk)
util.compare_cross_corr(X,Xk,dist=0)
Exemple #9
0
from SNPknock.fastphase import loadHMM
from SNPknock import models
from SNPknock import knockoffHMM, knockoffHaplotypes, knockoffGenotypes
import util, pdb

# Load HMM
r_file = "data/haplotypes_rhat.txt"
alpha_file = "data/haplotypes_alphahat.txt"
theta_file = "data/haplotypes_thetahat.txt"
char_file = "data/haplotypes_origchars"
hmm = loadHMM(r_file, alpha_file, theta_file, char_file, compact=True)
hmm_full = loadHMM(r_file, alpha_file, theta_file, char_file, compact=False)

# Sample X
n = 10
modelX = models.HMM(hmm_full['pInit'], hmm_full['Q'], hmm_full['pEmit'])
X = modelX.sample(n)

# Generate the knockoffs
#knockoffs = knockoffHMM(hmm_full['pInit'], hmm_full['Q'], hmm_full['pEmit'])
#Xk = knockoffs.sample(X)
#print("Generated knockoffs")

# Generate the knockoffs (genotypes)
knockoffs_gen = knockoffGenotypes(hmm['r'], hmm['alpha'], hmm['theta'])
Xk = knockoffs_gen.sample(X)
print("Generated knockoffs (genotypes)")

# Generate the knockoffs (haplotypes)
H = X
H[X == 2] = 1