Example #1
0
    def test_G0_has_reader(self):
        logging.info("TestSingleSnp test_G0_has_reader")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("G0_has_reader")

        frame0 = single_snp(test_snps=test_snps[:, :10],
                            pheno=pheno,
                            G0=test_snps,
                            leave_out_one_chrom=False,
                            covar=covar,
                            mixing=0,
                            output_file_name=output_file_name)
        self.compare_files(frame0, "one")

        frame1 = single_snp(test_snps=test_snps[:, :10],
                            pheno=pheno,
                            G0=KernelIdentity(test_snps.iid),
                            G1=test_snps,
                            leave_out_one_chrom=False,
                            covar=covar,
                            mixing=1,
                            output_file_name=output_file_name)
        self.compare_files(frame1, "one")
Example #2
0
    def test_file_cache(self):
        logging.info("TestSingleSnp test_file_cache")
        from pysnptools.snpreader import Bed
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("G1")
        cache_file = self.file_name("cache_file")+".npz"
        if os.path.exists(cache_file):
            os.remove(cache_file)
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno,G0=test_snps[:,10:100], 
                                      covar=covar, G1=test_snps[:,100:200],
                                      mixing=.5,
                                      output_file_name=output_file_name,
                                      cache_file = cache_file
                                      )
        self.compare_files(frame,"G1")

        frame2 = single_snp(test_snps=test_snps[:,:10], pheno=pheno,G0=None, 
                                      covar=covar, G1=None,
                                      mixing=.5,
                                      output_file_name=output_file_name,
                                      cache_file = cache_file
                                      )
        self.compare_files(frame2,"G1")
Example #3
0
    def test_file_cache(self):
        logging.info("TestSingleSnp test_file_cache")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("G1")
        cache_file = self.file_name("cache_file") + ".npz"
        if os.path.exists(cache_file):
            os.remove(cache_file)
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           G0=test_snps[:, 10:100],
                           leave_out_one_chrom=False,
                           covar=covar,
                           G1=test_snps[:, 100:200],
                           mixing=.5,
                           output_file_name=output_file_name,
                           cache_file=cache_file)
        self.compare_files(frame, "G1")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           G0=test_snps[:, 10:100],
                           leave_out_one_chrom=False,
                           covar=covar,
                           G1=test_snps[:, 100:200],
                           mixing=.5,
                           output_file_name=output_file_name,
                           cache_file=cache_file)
        self.compare_files(frame, "G1")
Example #4
0
    def test_gb_goal(self):
        logging.info("TestSingleSnp test_gb_goal")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("gb_goal")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           mixing=0,
                           leave_out_one_chrom=False,
                           G0=test_snps,
                           covar=covar,
                           GB_goal=0,
                           output_file_name=output_file)

        self.compare_files(frame, "one")

        output_file = self.file_name("gb_goal2")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           mixing=0,
                           leave_out_one_chrom=False,
                           G0=test_snps,
                           covar=covar,
                           GB_goal=.12,
                           output_file_name=output_file)

        self.compare_files(frame, "one")
Example #5
0
    def test_file_cache(self):
        logging.info("TestSingleSnp test_file_cache")
        from pysnptools.snpreader import Bed
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("G1")
        cache_file = self.file_name("cache_file") + ".npz"
        if os.path.exists(cache_file):
            os.remove(cache_file)
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           G0=test_snps[:, 10:100],
                           covar=covar,
                           G1=test_snps[:, 100:200],
                           mixing=.5,
                           output_file_name=output_file_name,
                           cache_file=cache_file)
        self.compare_files(frame, "G1")

        frame2 = single_snp(test_snps=test_snps[:, :10],
                            pheno=pheno,
                            G0=None,
                            covar=covar,
                            G1=None,
                            mixing=.5,
                            output_file_name=output_file_name,
                            cache_file=cache_file)
        self.compare_files(frame2, "G1")
    def test_match_cpp(self):
        '''
        match
            FaSTLMM.207\Data\DemoData>..\.cd.\bin\windows\cpp_mkl\fastlmmc -bfile snps -extract topsnps.txt -bfileSim snps -extractSim ASout.snps.txt -pheno pheno.txt -covar covariate.txt -out topsnps.singlesnp.txt -logDelta 0 -verbose 100

        '''
        logging.info("TestSingleSnp test_match_cpp")
        snps = Bed(os.path.join(self.pythonpath, "tests/datasets/selecttest/snps"), count_A1=False)
        pheno = os.path.join(self.pythonpath, "tests/datasets/selecttest/pheno.txt")
        covar = os.path.join(self.pythonpath, "tests/datasets/selecttest/covariate.txt")
        sim_sid = ["snp26250_m0_.19m1_.19","snp82500_m0_.28m1_.28","snp63751_m0_.23m1_.23","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp11253_m0_.2m1_.2","snp86250_m0_.33m1_.33","snp3753_m0_.23m1_.23","snp75003_m0_.32m1_.32","snp30002_m0_.25m1_.25","snp26252_m0_.19m1_.19","snp67501_m0_.15m1_.15","snp63750_m0_.28m1_.28","snp30001_m0_.28m1_.28","snp52502_m0_.35m1_.35","snp33752_m0_.31m1_.31","snp37503_m0_.37m1_.37","snp15002_m0_.11m1_.11","snp3751_m0_.34m1_.34","snp7502_m0_.18m1_.18","snp52503_m0_.3m1_.3","snp30000_m0_.39m1_.39","isnp4457_m0_.11m1_.11","isnp23145_m0_.2m1_.2","snp60001_m0_.39m1_.39","snp33753_m0_.16m1_.16","isnp60813_m0_.2m1_.2","snp82502_m0_.34m1_.34","snp11252_m0_.13m1_.13"]
        sim_idx = snps.sid_to_index(sim_sid)
        test_sid = ["snp26250_m0_.19m1_.19","snp63751_m0_.23m1_.23","snp82500_m0_.28m1_.28","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp86250_m0_.33m1_.33","snp15002_m0_.11m1_.11","snp33752_m0_.31m1_.31","snp26252_m0_.19m1_.19","snp30001_m0_.28m1_.28","snp11253_m0_.2m1_.2","snp67501_m0_.15m1_.15","snp3753_m0_.23m1_.23","snp52502_m0_.35m1_.35","snp30000_m0_.39m1_.39","snp30002_m0_.25m1_.25"]
        test_idx = snps.sid_to_index(test_sid)

        for G0,G1 in [(snps[:,sim_idx],KernelIdentity(snps.iid)),(KernelIdentity(snps.iid),snps[:,sim_idx])]:
            frame_h2 = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,h2=.5,leave_out_one_chrom=False,count_A1=False)
            frame_log_delta = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,log_delta=0,leave_out_one_chrom=False,count_A1=False)
            for frame in [frame_h2, frame_log_delta]:
                referenceOutfile = TestFeatureSelection.reference_file("single_snp/topsnps.single.txt")
                reference = pd.read_table(referenceOutfile,sep="\t") # We've manually remove all comments and blank lines from this file
                assert len(frame) == len(reference)
                for _, row in reference.iterrows():
                    sid = row.SNP
                    pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
                    reldiff = abs(row.Pvalue - pvalue)/row.Pvalue
                    assert reldiff < .035, "'{0}' pvalue_list differ too much {4} -- {2} vs {3}".format(sid,None,row.Pvalue,pvalue,reldiff)
Example #7
0
def execute_fs(test_snps, pheno, G0, covar):
    """
    run feature selection
    """

    result = {}
    fs_result = {}

    # fs unconditioned
    ########################
    tmp_uuid = str(uuid.uuid4())[0:13]
    out_fn = "tmp_pheno_%s.txt" % (tmp_uuid)
    out_data = pd.DataFrame({
        "id1": G0.iid[:, 0],
        "id2": G0.iid[:, 1],
        "y": pheno["vals"]
    })
    out_data.to_csv(out_fn, sep=" ", header=False, index=False)

    # write out covariates
    items = [
        ('id1', G0.iid[:, 0]),
        ('id2', G0.iid[:, 1]),
    ]

    items += [("pc_%i" % i, covar["vals"][:, i])
              for i in xrange(covar["vals"].shape[1])]
    cov_df = pd.DataFrame.from_items(items)
    cov_fn = "tmp_cov_%s.txt" % (tmp_uuid)
    cov_df.to_csv(cov_fn, sep=" ", header=False, index=False)

    #TODO: fix include_all!!
    fsd = create_feature_selection_distributable(G0,
                                                 out_fn,
                                                 None,
                                                 0,
                                                 "fs_out",
                                                 include_all=False,
                                                 cov_fn=cov_fn)
    fs_result["result_uncond_all"] = Local().run(fsd)
    best_k, best_delta, best_obj, best_snps = fs_result["result_uncond_all"]
    fs_idx = argintersect_left(G0.sid, best_snps)

    G_fs = G0[:, fs_idx]

    result["fs_all"] = single_snp(test_snps, pheno,
                                  G0=G_fs).sort(["Chr", "ChrPos"
                                                 ])["PValue"].as_matrix()
    result["fs_all_cov"] = single_snp(
        test_snps, pheno, G0=G_fs,
        covar=covar).sort(["Chr", "ChrPos"])["PValue"].as_matrix()

    return result, fs_result
    def test_leave_one_out_with_prekernels(self):
        logging.info(
            "TestSingleSnpLeaveOutOneChrom test_leave_one_out_with_prekernels")
        from pysnptools.kernelstandardizer import DiagKtoN
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        chrom_to_kernel = {}
        with patch.dict('os.environ', {'ARRAY_MODULE': 'numpy'}) as _:
            for chrom in np.unique(test_snps.pos[:, 0]):
                other_snps = test_snps[:, test_snps.pos[:, 0] != chrom]
                kernel = other_snps.read_kernel(
                    standardizer=Unit(), block_size=500
                )  #Create a kernel from the SNPs not used in testing
                chrom_to_kernel[chrom] = kernel.standardize(
                    DiagKtoN()
                )  #improves the kernel numerically by making its diagonal sum to iid_count

        output_file = self.file_name("one_looc_prekernel")
        frame = single_snp(test_snps,
                           pheno,
                           covar=covar,
                           K0=chrom_to_kernel,
                           output_file_name=output_file,
                           count_A1=False)

        self.compare_files(frame, "one_looc")
Example #9
0
def execute_dual_fs(test_snps, pheno, G0, covar):
    """
    implementation of dual-kernel feature selection
    """

    result = {}
    fs_result = {}

    # extract data
    G_test = test_snps.read().standardize().val
    G_train_unnorm = G0.read().standardize().val

    # fs conditioned on full kernel
    select = FeatureSelectionInSample(max_log_k=7, order_by_lmm=True)
    fs_result["insample_cond_full"] = select.run_select(G_train_unnorm,
                                                        G_train_unnorm,
                                                        pheno["vals"],
                                                        cov=covar["vals"])
    best_k, fs_idx, best_mix, best_delta = fs_result["insample_cond_full"]
    print "best_k:", best_k, ", best_mix:", best_mix

    # set up foreground kernel
    G1 = G0[:, fs_idx]

    result["full_fs_low"] = single_snp(test_snps,
                                       pheno,
                                       G0=G0,
                                       covar=covar,
                                       G1=G1,
                                       mixing=best_mix).sort(
                                           ["Chr",
                                            "ChrPos"])["PValue"].as_matrix()

    return result, fs_result
Example #10
0
    def test_linreg(self):
        logging.info("TestSingleSnp test_linreg")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("linreg")

        frame1 = single_snp(test_snps=test_snps[:, :10],
                            pheno=pheno,
                            mixing=0,
                            leave_out_one_chrom=False,
                            G0=KernelIdentity(iid=test_snps.iid),
                            covar=covar,
                            output_file_name=output_file)

        frame1 = frame1[[
            'sid_index', 'SNP', 'Chr', 'GenDist', 'ChrPos', 'PValue'
        ]]
        self.compare_files(frame1, "linreg")

        frame2 = single_snp_linreg(test_snps=test_snps[:, :10],
                                   pheno=pheno,
                                   covar=covar,
                                   output_file_name=output_file)
        self.compare_files(frame2, "linreg")
Example #11
0
def test_single_snp(args):
    import fastlmm
    from pysnptools.snpreader import SnpData, Pheno, SnpReader
    from fastlmm.association import single_snp
    from utils import read_hdf5_dataset
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    import fastlmm.util.util as flutil

    logger.info('read phenotypes from file: ' + args.phenotype_file)
    phenotypes = pd.read_table(args.phenotype_file)
    iid = np.repeat(phenotypes['id'].values.astype('S')[:, np.newaxis],
                    2,
                    axis=1)
    if args.sample_indices_file is not None:
        logger.info('read indices from file: ' + args.sample_indices_file)
        sample_indices = read_hdf5_dataset(args.sample_indices_file)
    else:
        sample_indices = np.nonzero(
            (phenotypes['type'] == 'training').values)[0]
    logger.info('read SNP file (for test): ' + args.snp_file)
    test_snps = get_snpdata(iid, args.snp_file, sample_indices=sample_indices)
    logger.info('read SNP file (for K0): ' + args.k0_file)
    K0 = get_snpdata(iid, args.k0_file)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    df_pheno = phenotypes[phenotypes['type'] == 'training'].copy()
    df_pheno['fid'] = df_pheno['id']
    df_pheno['iid'] = df_pheno['id']
    traits = ('trait1', 'trait2', 'trait3')
    for trait in traits:
        pheno_file = os.path.join(args.output_dir, 'pheno.%s.txt' % trait)
        logger.info('create Pheno file: ' + pheno_file)
        df_pheno[['fid', 'iid', trait]].to_csv(pheno_file,
                                               index=False,
                                               sep='\t',
                                               header=False)
        pheno = Pheno(pheno_file)
        logger.info('run FastLMM for single SNP test for %s' % trait)
        results_df = single_snp(test_snps,
                                pheno,
                                K0=K0,
                                count_A1=True,
                                GB_goal=args.GB_goal)
        result_file = os.path.join(args.output_dir, 'single_snp.' + trait)
        logger.info('save results to file: ' + result_file)
        results_df.to_hdf(result_file, trait)

        if args.manhattan:
            plot_file = os.path.join(args.output_dir,
                                     'manhattan.%s.pdf' % trait)
            logger.info('create Manhattan plot: ' + plot_file)
            plt.clf()
            flutil.manhattan_plot(results_df.as_matrix(
                ["Chr", "ChrPos", "PValue"]),
                                  pvalue_line=1e-5,
                                  xaxis_unit_bp=False)
            plt.savefig(plot_file)
Example #12
0
def execute_dual_fs(test_snps, pheno, G0, covar):
    """
    implementation of dual-kernel feature selection
    """
    
    result = {}
    fs_result = {}
    
    
    # extract data
    G_test = test_snps.read().standardize().val
    G_train_unnorm = G0.read().standardize().val
    
    # fs conditioned on full kernel
    select = FeatureSelectionInSample(max_log_k=7, order_by_lmm=True)
    fs_result["insample_cond_full"] = select.run_select(G_train_unnorm, G_train_unnorm, pheno["vals"], cov=covar["vals"])
    best_k, fs_idx, best_mix, best_delta = fs_result["insample_cond_full"]
    print "best_k:", best_k, ", best_mix:", best_mix

    # set up foreground kernel
    G1 = G0[:,fs_idx]
    
    result["full_fs_low"] = single_snp(test_snps, pheno, G0=G0, covar=covar, G1=G1, mixing=best_mix).sort(["Chr", "ChrPos"])["PValue"].as_matrix()

    return result, fs_result
Example #13
0
def execute_lmm(test_snps, pheno, G0, covar):
    
    result = {}
    fs_result = {}
    
    result["full"] = single_snp(test_snps, pheno, G0=G0, covar=covar).sort(["Chr", "ChrPos"])["PValue"].as_matrix()

    return result, fs_result
Example #14
0
def execute_lmm(test_snps, pheno, G0, covar):

    result = {}
    fs_result = {}

    result["full"] = single_snp(test_snps, pheno, G0=G0,
                                covar=covar).sort(["Chr", "ChrPos"
                                                   ])["PValue"].as_matrix()

    return result, fs_result
Example #15
0
    def test_old(self):
        logging.info("test_old")

        output_file = self.file_name("old")
        results_df = single_snp(self.bed,
                                self.phen_fn,
                                covar=self.cov_fn,
                                count_A1=True,
                                output_file_name=output_file)
        self.compare_files(results_df, "old")
    def test_G0_has_reader(self):
        logging.info("TestSingleSnp test_G0_has_reader")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("G0_has_reader")

        frame0 = single_snp(test_snps=test_snps[:,:10], pheno=pheno, G0=test_snps, leave_out_one_chrom=False,
                                  covar=covar, mixing=0,
                                  output_file_name=output_file_name,count_A1=False
                                  )
        self.compare_files(frame0,"one")

        frame1 = single_snp(test_snps=test_snps[:,:10], pheno=pheno, G0=KernelIdentity(test_snps.iid), G1=test_snps, leave_out_one_chrom=False,
                                  covar=covar, mixing=1,
                                  output_file_name=output_file_name,count_A1=False
                                  )
        self.compare_files(frame1,"one")
    def test_no_cov(self):
        logging.info("TestSingleSnp test_no_cov")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn

        output_file_name = self.file_name("no_cov")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, G0=test_snps, mixing=0,leave_out_one_chrom=False,
                                          output_file_name=output_file_name,count_A1=False
                                          )

        self.compare_files(frame,"no_cov")
     def mapper_single_snp_2K_given_chrom(test_chr):
         logging.info("Working on chr={0}".format(test_chr))
         test_snps_chrom = test_snps[:,test_snps.pos[:,0]==test_chr]
         G_for_chrom = _K_per_chrom(G, test_chr, G.iid).snpreader
         chrom_index = chrom_list.index(test_chr)
         best_sid = chrom_index_to_best_sid[chrom_index]
 
         K1 = G_for_chrom[:,G_for_chrom.sid_to_index(best_sid)]
         result = single_snp(test_snps=test_snps_chrom, K0=G_for_chrom, K1=K1, pheno=pheno,
                     covar=covar, leave_out_one_chrom=False, 
                     GB_goal=GB_goal,  force_full_rank=force_full_rank, force_low_rank=force_low_rank,mixing=mixing,h2=h2)
         return result
    def test_gb_goal(self):
        logging.info("TestSingleSnp test_gb_goal")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("gb_goal")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, mixing=0,leave_out_one_chrom=False,
                                  G0=test_snps, covar=covar, GB_goal=0,
                                  output_file_name=output_file,count_A1=False
                                  )

        self.compare_files(frame,"one")

        output_file = self.file_name("gb_goal2")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, mixing=0,leave_out_one_chrom=False,
                                  G0=test_snps, covar=covar, GB_goal=.12,
                                  output_file_name=output_file,count_A1=False
                                  )

        self.compare_files(frame,"one")
Example #20
0
    def test_no_cov(self):
        logging.info("TestSingleSnp test_no_cov")
        from pysnptools.snpreader import Bed
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn

        output_file_name = self.file_name("no_cov")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, G0=test_snps, mixing=0,
                                          output_file_name=output_file_name
                                          )

        self.compare_files(frame,"no_cov")
Example #21
0
     def mapper_single_snp_2K_given_chrom(test_chr):
         logging.info("Working on chr={0}".format(test_chr))
         test_snps_chrom = test_snps[:,test_snps.pos[:,0]==test_chr]
         G_for_chrom = _K_per_chrom(G, test_chr, G.iid).snpreader
         chrom_index = chrom_list.index(test_chr)
         best_sid = chrom_index_to_best_sid[chrom_index]
 
         K1 = G_for_chrom[:,G_for_chrom.sid_to_index(best_sid)]
         result = single_snp(test_snps=test_snps_chrom, K0=G_for_chrom, K1=K1, pheno=pheno,
                     covar=covar, leave_out_one_chrom=False, 
                     GB_goal=GB_goal,  force_full_rank=force_full_rank, force_low_rank=force_low_rank,mixing=mixing,h2=h2)
         return result
    def test_other(self):
        logging.info("TestSingleSnp test_other")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("other")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno,leave_out_one_chrom=False,
                                  K1=test_snps, covar=covar, 
                                  output_file_name=output_file,count_A1=False
                                  )

        self.compare_files(frame,"one")
    def test_interact(self):
        logging.info("TestSingleSnp test_interact")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("interact")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, mixing=0,leave_out_one_chrom=False,
                                  G0=test_snps, covar=covar, interact_with_snp=1,
                                  output_file_name=output_file,count_A1=False
                                  )

        self.compare_files(frame,"interact")
    def test_mixid(self):
        logging.info("TestSingleSnp test_mixid")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("mixid")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno,G0=test_snps[:,10:100], leave_out_one_chrom=False,
                                      covar=covar, K1=KernelIdentity(test_snps.iid),mixing=.25,
                                      output_file_name=output_file_name,count_A1=False
                                      )

        self.compare_files(frame,"mixid")
    def test_mixingKs(self):
        logging.info("TestSingleSnp test_mixingKs")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("mixingKs")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno,K0=SnpKernel(test_snps[:,10:100],Unit()),leave_out_one_chrom=False,
                                      covar=covar, K1=SnpKernel(test_snps[:,100:200],Unit()),mixing=None,
                                      output_file_name=output_file_name,count_A1=False
                                      )

        self.compare_files(frame,"mixing")
    def test_interact_looc(self):
        logging.info("TestSingleSnpLeaveOutOneChrom test_interact_looc")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("interact_looc")
        frame = single_snp(test_snps, pheno,
                                  covar=covar, mixing=0, interact_with_snp=0,
                                  output_file_name=output_file,count_A1=False
                                  )

        self.compare_files(frame,"interact_looc")
    def test_preload_files(self):
        logging.info("TestSingleSnp test_preload_files")
        test_snps = self.bedbase
        pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True)
        covar = pstpheno.loadPhen(self.cov_fn)
        bed = Bed(test_snps, count_A1=False)

        output_file_name = self.file_name("preload_files")

        frame = single_snp(test_snps=bed[:,:10], pheno=pheno, G0=test_snps, mixing=0,leave_out_one_chrom=False,
                                  covar=covar, output_file_name=output_file_name,count_A1=False
                                  )
        self.compare_files(frame,"one")
Example #28
0
    def test_noK0(self):
        logging.info("TestSingleSnp test_noK0")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("noK0")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, mixing=1,leave_out_one_chrom=False,
                                  G1=test_snps, covar=covar, 
                                  output_file_name=output_file
                                  )

        self.compare_files(frame,"one")
    def test_two_looc(self):
        logging.info("TestSingleSnpLeaveOutOneChrom test_two_looc")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("two_looc")
        frame = single_snp(test_snps[:,::10], pheno,
                                  covar=covar,
                                  output_file_name=output_file,count_A1=False
                                  )

        self.compare_files(frame,"two_looc")
Example #30
0
    def test_none(self):
        logging.info("TestSingleSnp test_none")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("none")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, mixing=0,leave_out_one_chrom=False,
                                  K0=KernelIdentity(test_snps.iid), covar=covar, 
                                  output_file_name=output_file
                                  )

        self.compare_files(frame,"none")
Example #31
0
    def test_one_looc(self):
        logging.info("TestSingleSnpLeaveOutOneChrom test_one_looc")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("one_looc")
        frame = single_snp(test_snps, pheno,
                                  covar=covar, mixing=0,
                                  output_file_name=output_file,
                                  )

        self.compare_files(frame,"one_looc")
Example #32
0
    def test_mixing(self):
        logging.info("TestSingleSnp test_mixing")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("mixing")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno,G0=test_snps[:,10:100], leave_out_one_chrom=False,
                                      covar=covar, G1=test_snps[:,100:200],mixing=None,
                                      output_file_name=output_file_name
                                      )

        self.compare_files(frame,"mixing")
Example #33
0
def execute_fs(test_snps, pheno, G0, covar):
    """
    run feature selection
    """
    
    result = {}
    fs_result = {}
    
    # fs unconditioned
    ########################
    tmp_uuid = str(uuid.uuid4())[0:13]
    out_fn = "tmp_pheno_%s.txt" % (tmp_uuid)
    out_data = pd.DataFrame({"id1": G0.iid[:,0], "id2": G0.iid[:,1], "y": pheno["vals"]})
    out_data.to_csv(out_fn, sep=" ", header=False, index=False)
    
    # write out covariates
    items = [
                ('id1', G0.iid[:,0]),
                ('id2', G0.iid[:,1]), 
            ]
    
    items += [("pc_%i" % i, covar["vals"][:,i]) for i in xrange(covar["vals"].shape[1])]
    cov_df = pd.DataFrame.from_items(items)
    cov_fn = "tmp_cov_%s.txt" % (tmp_uuid)
    cov_df.to_csv(cov_fn, sep=" ", header=False, index=False)
    
    #TODO: fix include_all!!
    fsd = create_feature_selection_distributable(G0, out_fn, None, 0, "fs_out", include_all=False, cov_fn=cov_fn)
    fs_result["result_uncond_all"] = Local().run(fsd)
    best_k, best_delta, best_obj, best_snps = fs_result["result_uncond_all"]
    fs_idx = argintersect_left(G0.sid, best_snps)
    
    G_fs = G0[:,fs_idx]
    
    result["fs_all"] = single_snp(test_snps, pheno, G0=G_fs).sort(["Chr", "ChrPos"])["PValue"].as_matrix()
    result["fs_all_cov"] = single_snp(test_snps, pheno, G0=G_fs, covar=covar).sort(["Chr", "ChrPos"])["PValue"].as_matrix()

    return result, fs_result
    def test_unknown_sid(self):
        logging.info("TestSingleSnp test_unknown_sid")

        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        try:
            frame = single_snp(test_snps=test_snps,G0=test_snps,pheno=pheno,leave_out_one_chrom=False,mixing=0,covar=covar,sid_list=['1_4','bogus sid','1_9'],count_A1=False)
            failed = False
        except:
            failed = True

        assert(failed)
Example #35
0
    def test_two_looc(self):
        logging.info("TestSingleSnpLeaveOutOneChrom test_two_looc")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("two_looc")
        frame = single_snp(test_snps[:, ::10],
                           pheno,
                           covar=covar,
                           output_file_name=output_file,
                           count_A1=False)

        self.compare_files(frame, "two_looc")
Example #36
0
    def test_G0_has_reader(self):
        logging.info("TestSingleSnp test_G0_has_reader")
        from pysnptools.snpreader import Bed
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("G0_has_reader")

        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, G0=test_snps, 
                                  covar=covar, mixing=0,
                                  output_file_name=output_file_name
                                  )
        self.compare_files(frame,"one")
Example #37
0
    def test_preload_files(self):
        logging.info("TestSingleSnp test_preload_files")
        from pysnptools.snpreader import Bed
        test_snps = self.bedbase
        pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True)
        covar = pstpheno.loadPhen(self.cov_fn)
        bed = Bed(test_snps)

        output_file_name = self.file_name("preload_files")

        frame = single_snp(test_snps=bed[:,:10], pheno=pheno, G0=test_snps, mixing=0,
                                  covar=covar, output_file_name=output_file_name
                                  )
        self.compare_files(frame,"one")
Example #38
0
    def test_mixing(self):
        logging.info("TestSingleSnp test_mixing")
        from pysnptools.snpreader import Bed
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("mixing")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno,G0=test_snps[:,10:100], 
                                      covar=covar, G1=test_snps[:,100:200],mixing=None,
                                      output_file_name=output_file_name
                                      )

        self.compare_files(frame,"mixing")
Example #39
0
    def test_no_cov(self):
        logging.info("TestSingleSnp test_no_cov")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn

        output_file_name = self.file_name("no_cov")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           G0=test_snps,
                           mixing=0,
                           leave_out_one_chrom=False,
                           output_file_name=output_file_name)

        self.compare_files(frame, "no_cov")
Example #40
0
    def test_no_cov(self):
        logging.info("TestSingleSnp test_no_cov")
        from pysnptools.snpreader import Bed
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn

        output_file_name = self.file_name("no_cov")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           G0=test_snps,
                           mixing=0,
                           output_file_name=output_file_name)

        self.compare_files(frame, "no_cov")
Example #41
0
            def mapper_gather_lots(i_fold_and_pair):
                i_fold, (train_idx, test_idx) = i_fold_and_pair
                logging.info("Working on GWAS_1K and k search, chrom={0}, i_fold={1}".format(test_chr, i_fold))

                G_train = G_for_chrom[train_idx,:]

                #Precompute whole x whole standardized on train
                from fastlmm.association.single_snp import _internal_determine_block_size, _block_size_from_GB_goal
                min_count = _internal_determine_block_size(G_for_chrom, None, None, force_full_rank, force_low_rank)
                block_size = _block_size_from_GB_goal(GB_goal, G_for_chrom.iid_count, min_count)
                K_whole_unittrain = _SnpWholeWithTrain(whole=G_for_chrom,train_idx=train_idx, standardizer=Unit(), block_size=block_size).read()

                assert np.array_equal(K_whole_unittrain.iid,G_for_chrom.iid),"real assert"
                K_train = K_whole_unittrain[train_idx]
                    
                single_snp_result = single_snp(test_snps=G_train, K0=K_train, pheno=pheno, #iid intersection means when can give the whole covariate and pheno
                             covar=covar, leave_out_one_chrom=False,
                             GB_goal=GB_goal,  force_full_rank=force_full_rank, force_low_rank=force_low_rank, mixing=mixing, h2=h2)

                is_all = (i_fold == n_folds) if n_folds > 1 else True

                k_list_in =  [0] + [int(k) for k in k_list if 0 < k and k < len(single_snp_result)]

                if is_all:
                    top_snps = list(single_snp_result.SNP[:max_k])
                else:
                    top_snps = None

                if i_fold == n_folds:
                    k_index_to_nLL = None
                else:
                    k_index_to_nLL = []
                    for k in k_list_in:
                        top_k = G_for_chrom[:,G_for_chrom.sid_to_index(single_snp_result.SNP[:k])]
                        logging.info("Working on chr={0}, i_fold={1}, and K_{2}".format(test_chr,i_fold,k))

                        top_k_train = top_k[train_idx,:] if k > 0 else None
                        fastlmm = FastLMM(force_full_rank=force_full_rank, force_low_rank=force_low_rank,GB_goal=GB_goal)
                        fastlmm.fit(K0_train=K_train, K1_train=top_k_train, X=covar, y=pheno,mixing=mixing,h2raw=h2) #iid intersection means when can give the whole covariate and pheno
    
                        top_k_test = top_k[test_idx,:] if k > 0 else None
                        K0_whole_test = K_whole_unittrain[:,test_idx]
                        nLL = fastlmm.score(K0_whole_test=K0_whole_test,K1_whole_test=top_k_test,X=covar,y=pheno) #iid intersection means when can give the whole covariate and pheno
                        k_index_to_nLL.append(nLL)

                if i_fold > 0:
                    k_list_in = None
    
                return k_list_in, top_snps, k_index_to_nLL
            def mapper_gather_lots(i_fold_and_pair):
                i_fold, (train_idx, test_idx) = i_fold_and_pair
                logging.info("Working on GWAS_1K and k search, chrom={0}, i_fold={1}".format(test_chr, i_fold))

                G_train = G_for_chrom[train_idx,:]

                #Precompute whole x whole standardized on train
                from fastlmm.association.single_snp import _internal_determine_block_size, _block_size_from_GB_goal
                min_count = _internal_determine_block_size(G_for_chrom, None, None, force_full_rank, force_low_rank)
                block_size = _block_size_from_GB_goal(GB_goal, G_for_chrom.iid_count, min_count)
                K_whole_unittrain = _SnpWholeWithTrain(whole=G_for_chrom,train_idx=train_idx, standardizer=Unit(), block_size=block_size).read()

                assert np.array_equal(K_whole_unittrain.iid,G_for_chrom.iid),"real assert"
                K_train = K_whole_unittrain[train_idx]
                    
                single_snp_result = single_snp(test_snps=G_train, K0=K_train, pheno=pheno, #iid intersection means when can give the whole covariate and pheno
                             covar=covar, leave_out_one_chrom=False,
                             GB_goal=GB_goal,  force_full_rank=force_full_rank, force_low_rank=force_low_rank, mixing=mixing, h2=h2)

                is_all = (i_fold == n_folds) if n_folds > 1 else True

                k_list_in =  [0] + [int(k) for k in k_list if 0 < k and k < len(single_snp_result)]

                if is_all:
                    top_snps = list(single_snp_result.SNP[:max_k])
                else:
                    top_snps = None

                if i_fold == n_folds:
                    k_index_to_nLL = None
                else:
                    k_index_to_nLL = []
                    for k in k_list_in:
                        top_k = G_for_chrom[:,G_for_chrom.sid_to_index(single_snp_result.SNP[:k])]
                        logging.info("Working on chr={0}, i_fold={1}, and K_{2}".format(test_chr,i_fold,k))

                        top_k_train = top_k[train_idx,:] if k > 0 else None
                        fastlmm = FastLMM(force_full_rank=force_full_rank, force_low_rank=force_low_rank,GB_goal=GB_goal)
                        fastlmm.fit(K0_train=K_train, K1_train=top_k_train, X=covar, y=pheno,mixing=mixing,h2=h2) #iid intersection means when can give the whole covariate and pheno
    
                        top_k_test = top_k[test_idx,:] if k > 0 else None
                        K0_whole_test = K_whole_unittrain[:,test_idx]
                        nLL = fastlmm.score(K0_whole_test=K0_whole_test,K1_whole_test=top_k_test,X=covar,y=pheno) #iid intersection means when can give the whole covariate and pheno
                        k_index_to_nLL.append(nLL)

                if i_fold > 0:
                    k_list_in = None
    
                return k_list_in, top_snps, k_index_to_nLL
    def test_file_cache(self):
        logging.info("TestSingleSnp test_file_cache")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("G1")
        cache_file = self.file_name("cache_file")+".npz"
        if os.path.exists(cache_file):
            os.remove(cache_file)
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno,G0=test_snps[:,10:100], leave_out_one_chrom=False,
                                      covar=covar, G1=test_snps[:,100:200],
                                      mixing=.5,
                                      output_file_name=output_file_name,
                                      cache_file = cache_file,count_A1=False
                                      )
        self.compare_files(frame,"G1")
        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno,G0=test_snps[:,10:100], leave_out_one_chrom=False,
                                      covar=covar, G1=test_snps[:,100:200],
                                      mixing=.5,
                                      output_file_name=output_file_name,
                                      cache_file = cache_file,count_A1=False
                                      )
        self.compare_files(frame,"G1")
    def test_G1(self):
        logging.info("TestSingleSnp test_G1")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("G1")
        for force_full_rank,force_low_rank in [(False,True),(False,False),(True,False)]:
            logging.info("{0},{1}".format(force_full_rank,force_low_rank))
            frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno,G0=test_snps[:,10:100], leave_out_one_chrom=False,
                                          covar=covar, G1=test_snps[:,100:200],
                                          mixing=.5,force_full_rank=force_full_rank,force_low_rank=force_low_rank,
                                          output_file_name=output_file_name,count_A1=False
                                          )
            self.compare_files(frame,"G1")
    def test_SNC(self):
        logging.info("TestSNC")
        test_snps = self.bedbase
        pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True)
        covar = pstpheno.loadPhen(self.cov_fn)
        bed = Bed(test_snps, count_A1=False)
        snc = bed.read()
        snc.val[:,2] = [0] * snc.iid_count # make SNP #2 have constant values (aka a SNC)

        output_file_name = self.file_name("snc")

        frame = single_snp(test_snps=snc[:,:10], pheno=pheno, G0=snc, mixing=0,leave_out_one_chrom=False,
                                  covar=covar, output_file_name=output_file_name,count_A1=False
                                  )
        self.compare_files(frame,"snc")
 def test_covar_by_chrom_mixing(self):
     logging.info("TestSingleSnpLeaveOutOneChrom test_covar_by_chrom_mixing")
     test_snps = Bed(self.bedbase, count_A1=False)
     pheno = self.phen_fn
     covar = self.cov_fn
     covar = Pheno(self.cov_fn).read()
     covar = SnpData(iid=covar.iid,sid=["pheno-1"],val=covar.val)
     covar_by_chrom = {chrom:self.cov_fn for chrom in xrange(1,6)}
     output_file = self.file_name("covar_by_chrom_mixing")
     frame = single_snp(test_snps, pheno,
                                 covar=covar,
                                 covar_by_chrom=covar_by_chrom,
                                 output_file_name=output_file,count_A1=False
                                 )
     self.compare_files(frame,"covar_by_chrom_mixing")
Example #47
0
    def test_other(self):
        logging.info("TestSingleSnp test_other")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("other")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           leave_out_one_chrom=False,
                           K1=test_snps,
                           covar=covar,
                           output_file_name=output_file)

        self.compare_files(frame, "one")
Example #48
0
    def test_interact_looc(self):
        logging.info("TestSingleSnpLeaveOutOneChrom test_interact_looc")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("interact_looc")
        frame = single_snp(test_snps,
                           pheno,
                           covar=covar,
                           mixing=0,
                           interact_with_snp=0,
                           output_file_name=output_file)

        self.compare_files(frame, "interact_looc")
Example #49
0
    def test_old_one(self):
        logging.info("test_old_one")

        output_file = self.file_name("old_one")

        test_snps3 = self.bed[:, self.bed.pos[:, 0] ==
                              3]  # Test only on chromosome 3
        results_df = single_snp(
            test_snps=test_snps3,
            K0=self.bed,
            pheno=self.phen_fn,
            covar=self.cov_fn,
            count_A1=True,
            output_file_name=output_file,
        )
        self.compare_files(results_df, "old_one")
Example #50
0
    def test_none(self):
        logging.info("TestSingleSnp test_none")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("none")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           mixing=0,
                           leave_out_one_chrom=False,
                           K0=KernelIdentity(test_snps.iid),
                           covar=covar,
                           output_file_name=output_file)

        self.compare_files(frame, "none")
Example #51
0
 def test_covar_by_chrom_mixing(self):
     logging.info(
         "TestSingleSnpLeaveOutOneChrom test_covar_by_chrom_mixing")
     test_snps = Bed(self.bedbase)
     pheno = self.phen_fn
     covar = self.cov_fn
     covar = Pheno(self.cov_fn).read()
     covar = SnpData(iid=covar.iid, sid=["pheno-1"], val=covar.val)
     covar_by_chrom = {chrom: self.cov_fn for chrom in xrange(1, 6)}
     output_file = self.file_name("covar_by_chrom_mixing")
     frame = single_snp(test_snps,
                        pheno,
                        covar=covar,
                        covar_by_chrom=covar_by_chrom,
                        output_file_name=output_file)
     self.compare_files(frame, "covar_by_chrom_mixing")
Example #52
0
    def test_G0_has_reader(self):
        logging.info("TestSingleSnp test_G0_has_reader")
        from pysnptools.snpreader import Bed
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("G0_has_reader")

        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           G0=test_snps,
                           covar=covar,
                           mixing=0,
                           output_file_name=output_file_name)
        self.compare_files(frame, "one")
Example #53
0
    def test_preload_files(self):
        logging.info("TestSingleSnp test_preload_files")
        test_snps = self.bedbase
        pheno = pstpheno.loadOnePhen(self.phen_fn, vectorize=True)
        covar = pstpheno.loadPhen(self.cov_fn)
        bed = Bed(test_snps)

        output_file_name = self.file_name("preload_files")

        frame = single_snp(test_snps=bed[:, :10],
                           pheno=pheno,
                           G0=test_snps,
                           mixing=0,
                           leave_out_one_chrom=False,
                           covar=covar,
                           output_file_name=output_file_name)
        self.compare_files(frame, "one")
Example #54
0
    def test_noK0(self):
        logging.info("TestSingleSnp test_noK0")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("noK0")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           mixing=1,
                           leave_out_one_chrom=False,
                           G1=test_snps,
                           covar=covar,
                           output_file_name=output_file,
                           count_A1=False)

        self.compare_files(frame, "one")
Example #55
0
    def test_G1(self):
        logging.info("TestSingleSnp test_G1")
        from pysnptools.snpreader import Bed
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("G1")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           G0=test_snps[:, 10:100],
                           covar=covar,
                           G1=test_snps[:, 100:200],
                           mixing=.5,
                           output_file_name=output_file_name)

        self.compare_files(frame, "G1")
Example #56
0
    def test_mixingKs(self):
        logging.info("TestSingleSnp test_mixingKs")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("mixingKs")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           K0=SnpKernel(test_snps[:, 10:100], Unit()),
                           leave_out_one_chrom=False,
                           covar=covar,
                           K1=SnpKernel(test_snps[:, 100:200], Unit()),
                           mixing=None,
                           output_file_name=output_file_name)

        self.compare_files(frame, "mixing")
Example #57
0
    def test_mixid(self):
        logging.info("TestSingleSnp test_mixid")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("mixid")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           G0=test_snps[:, 10:100],
                           leave_out_one_chrom=False,
                           covar=covar,
                           K1=KernelIdentity(test_snps.iid),
                           mixing=.25,
                           output_file_name=output_file_name)

        self.compare_files(frame, "mixid")
Example #58
0
    def test_interact(self):
        logging.info("TestSingleSnp test_interact")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file = self.file_name("interact")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           mixing=0,
                           leave_out_one_chrom=False,
                           G0=test_snps,
                           covar=covar,
                           interact_with_snp=1,
                           output_file_name=output_file)

        self.compare_files(frame, "interact")
Example #59
0
    def test_G1_mixing(self):
        logging.info("TestSingleSnp test_G1_mixing")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn
        covar = self.cov_fn

        output_file_name = self.file_name("G1_mixing")
        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           G0=test_snps,
                           leave_out_one_chrom=False,
                           covar=covar,
                           G1=test_snps[:, 100:200],
                           mixing=0,
                           output_file_name=output_file_name)

        self.compare_files(frame, "one")
Example #60
0
def epi_reml(pair_snps,
             pheno,
             covar=None,
             kernel_snps=None,
             output_dir='results',
             part_count=33,
             runner=None,
             override=False):
    from pysnptools.kernelreader import SnpKernel
    from pysnptools.standardizer import Unit
    import datetime
    from fastlmm.association import single_snp

    part_list = list(split_on_sids(pair_snps, part_count))
    part_pair_count = (part_count * part_count + part_count) / 2
    part_pair_index = -1
    print("part_pair_count={0:,}".format(part_pair_count))
    K0 = SnpKernel(kernel_snps or pair_snps,
                   standardizer=Unit()).read()  #Precompute the similarity
    if not os.path.exists(output_dir): os.makedirs(output_dir)
    start_time = datetime.datetime.now()
    for i in range(part_count):
        part_i = part_list[i]
        for j in range(i, part_count):
            part_pair_index += 1
            pairs = _Pairs2(part_i) if i == j else _Pairs2(
                part_i, part_list[j])
            print("Looking at pair {0},{1} which is {2} of {3}".format(
                i, j, part_pair_index, part_pair_count))
            output_file = '{0}/result.{1}.{2}.tsv'.format(
                output_dir, part_pair_index, part_pair_count)
            if override or not os.path.exists(output_file):
                result_df_ij = single_snp(pairs,
                                          K0=K0,
                                          pheno=pheno,
                                          covar=covar,
                                          leave_out_one_chrom=False,
                                          count_A1=True,
                                          runner=runner)
                result_df_ij.to_csv(output_file, sep="\t", index=False)
                print(result_df_ij[:1])
                time_so_far = datetime.datetime.now() - start_time
                total_time_estimate = time_so_far * part_pair_count / (
                    part_pair_index + 1)
                print(total_time_estimate)