Exemple #1
0
def main(args):
    np.random.seed(args.beta_num)
    sim = SumstatSimulation(args.sim_name)
    arch = Architecture(sim.architecture)
    d = Dataset(sim.dataset)

    # sample the beta
    beta = arch.draw_effect_sizes(sim.dataset, sim.h2g)[:, 0]

    # compute noiseless phenotypes slice by slice
    Y = np.zeros(d.N)
    t0 = time()
    for s in d.slices():
        # X will be N x M
        print(int(time() - t0), ': getting genotypes from file. SNPs', s)
        X = d.get_standardized_genotypes(s)
        print('computing phenotypes. SNPs', s)
        Y += X.dot(beta[s[0]:s[1]])
        del X

    # normalize the Y and the beta to the desired heritability
    normalization = np.std(Y) / np.sqrt(sim.h2g)
    if normalization == 0: normalization = 1  # just in case we have some 0s...
    Y /= normalization
    beta /= normalization

    # write the betas and the noiseless phenotypes
    pickle.dump(beta, sim.beta_file(args.beta_num, 'wb'), 2)
    pickle.dump(Y, sim.noiseless_Y_file(args.beta_num, 'wb'), 2)
Exemple #2
0
def main(args):
    np.random.seed(args.beta_num)
    sim = SumstatSimulation(args.sim_name)
    arch = Architecture(sim.architecture)
    d = Dataset(sim.dataset)

    # sample the beta
    beta = arch.draw_effect_sizes(sim.dataset, sim.h2g)[:, 0]

    # compute noiseless phenotypes slice by slice
    Y = np.zeros(d.N)
    t0 = time()
    for s in d.slices():
        # X will be N x M
        print(int(time() - t0), ": getting genotypes from file. SNPs", s)
        X = d.get_standardized_genotypes(s)
        print("computing phenotypes. SNPs", s)
        Y += X.dot(beta[s[0] : s[1]])
        del X

    # normalize the Y and the beta to the desired heritability
    normalization = np.std(Y) / np.sqrt(sim.h2g)
    if normalization == 0:
        normalization = 1  # just in case we have some 0s...
    Y /= normalization
    beta /= normalization

    # write the betas and the noiseless phenotypes
    pickle.dump(beta, sim.beta_file(args.beta_num, "wb"), 2)
    pickle.dump(Y, sim.noiseless_Y_file(args.beta_num, "wb"), 2)
Exemple #3
0
def submit(args):
    sim = SumstatSimulation(args.sim_name)
    my_args = ["--sim_name", args.sim_name, "main", "--beta_num", "$LSB_JOBINDEX"]
    outfilepath = sim.path() + ".sim_betas.%I.out"
    bsub.submit(
        ["python", "-u", paths.code + "sim/sim_betas.py"] + my_args,
        outfilepath,
        jobname="simbetas[1-" + str(sim.num_betas) + "]",
        memory_GB=5,
    )
Exemple #4
0
def submit(args):
    sim = SumstatSimulation(args.sim_name)
    my_args = [
        '--sim_name', args.sim_name, 'main', '--beta_num', '$LSB_JOBINDEX'
    ]
    outfilepath = sim.path() + \
            '.sim_betas.%I.out'
    bsub.submit(['python', '-u', paths.code + 'sim/sim_betas.py'] + my_args,
                outfilepath,
                jobname='simbetas[1-' + str(sim.num_betas) + ']',
                memory_GB=5)
Exemple #5
0
def main(args):
    np.random.seed(args.beta_num + args.sample_num * 10000)
    sim = SumstatSimulation(args.sim_name)
    d = Dataset(sim.dataset)
    pretty.print_namespace(sim)
    print()

    # read in noiseless phenotypes
    Y = pickle.load(sim.noiseless_Y_file(args.beta_num))

    # choose individuals and create ensemble of Ys
    indices = np.random.choice(Y.shape[0], size=(sim.sample_size, ))
    Y = Y[indices]

    # compute how much noise to add
    sigma2e = 1 - sim.h2g
    print('adding noise. sigma2e =', sigma2e)
    Y += np.sqrt(sigma2e) * np.random.randn(*Y.shape)

    if sim.condition_on_covariates:
        print('projecting covariates out of Y')
        Y = d.project_out_covariates(Y, covariates=d.covariates[indices])

    alphahat = np.zeros(d.M)
    t0 = time()

    def compute_sumstats_for_slice(s):
        # X will be N x M
        print(int(time() - t0), ': getting genotypes from file. SNPs', s)
        X = d.get_standardized_genotypes(s)[indices]

        if sim.condition_on_covariates:
            print(int(time() - t0), ': projecting out covariates')
            X = d.project_out_covariates(X, covariates=d.covariates[indices])

        print(int(time() - t0), ': computing sumstats. SNPs', s)
        alphahat[s[0]:s[1]] = X.T.dot(Y) / sim.sample_size
        del X

    map(compute_sumstats_for_slice, d.slices())

    # write output
    def write_output():
        pickle.dump(indices,
                    sim.individuals_file(args.beta_num, args.sample_num, 'wb'),
                    2)
        pickle.dump(Y, sim.noisy_Y_file(args.beta_num, args.sample_num, 'wb'),
                    2)
        pickle.dump(alphahat,
                    sim.sumstats_file(args.beta_num, args.sample_num, 'wb'), 2)

    write_output()
Exemple #6
0
def main(args):
    np.random.seed(args.beta_num + args.sample_num * 10000)
    sim = SumstatSimulation(args.sim_name)
    d = Dataset(sim.dataset)
    pretty.print_namespace(sim); print()

    # read in noiseless phenotypes
    Y = pickle.load(sim.noiseless_Y_file(args.beta_num))

    # choose individuals and create ensemble of Ys
    indices = np.random.choice(Y.shape[0], size=(sim.sample_size,))
    Y = Y[indices]

    # compute how much noise to add
    sigma2e = 1 - sim.h2g
    print('adding noise. sigma2e =', sigma2e)
    Y += np.sqrt(sigma2e) * np.random.randn(*Y.shape)

    if sim.condition_on_covariates:
        print('projecting covariates out of Y')
        Y = d.project_out_covariates(Y, covariates=d.covariates[indices])

    alphahat = np.zeros(d.M)
    t0 = time()
    def compute_sumstats_for_slice(s):
        # X will be N x M
        print(int(time() - t0), ': getting genotypes from file. SNPs', s)
        X = d.get_standardized_genotypes(s)[indices]

        if sim.condition_on_covariates:
            print(int(time() - t0), ': projecting out covariates')
            X = d.project_out_covariates(X, covariates=d.covariates[indices])

        print(int(time() - t0), ': computing sumstats. SNPs', s)
        alphahat[s[0]:s[1]] = X.T.dot(Y) / sim.sample_size
        del X
    map(compute_sumstats_for_slice, d.slices())

    # write output
    def write_output():
        pickle.dump(indices, sim.individuals_file(
                    args.beta_num, args.sample_num, 'wb'), 2)
        pickle.dump(Y, sim.noisy_Y_file(
                    args.beta_num, args.sample_num, 'wb'), 2)
        pickle.dump(alphahat, sim.sumstats_file(
                    args.beta_num, args.sample_num, 'wb'), 2)
    write_output()
Exemple #7
0
def submit(args):
    sim = SumstatSimulation(args.sim_name)

    def submit_beta(beta_num):
        my_args = [
            '--sim_name', args.sim_name, 'main', '--beta_num',
            str(beta_num), '--sample_num', '$LSB_JOBINDEX'
        ]
        outfilepath = \
            sim.path_to_beta(beta_num) + \
            '.sim_sumstats.%I.out'

        bsub.submit(
            ['python', '-u', paths.code + 'sim/sim_sumstats.py'] + my_args,
            outfilepath,
            jobname='simsumstats' + str(beta_num) + '[1-' +
            str(sim.num_samples_per_beta) + ']',
            # memory_GB=10.5)
            memory_GB=13)

    map(submit_beta, range(1, sim.num_betas + 1))
def run_on_batch(est, args):
    sim = SumstatSimulation(args.sim_name)
    pretty.print_namespace(sim); print()
    print('batch=', args.batch_num)
    print(est)
    est.run_and_save_results(args.batch_num, sim)
Exemple #9
0
 def __init__(self, simulation_names):
     self.simulations = []
     for sim_name in simulation_names:
         self.simulations.append(SumstatSimulation(sim_name))