def test_two(self):
        '''
        Lock in results on arbitrary data -- because meaningful runs take too long to run.
        '''
        fn = "two.txt"
        logging.info(fn)
        tmpOutfile = self.file_name(fn)

        snpreader = self.snpreader_whole[:10, :]

        spatial_coor = [[i, -i] for i in xrange(snpreader.iid_count)]
        alpha_list = alpha_list_big = [
            int(v) for v in np.logspace(2, np.log10(4000), 2)
        ]
        dataframe = heritability_spatial_correction(snpreader,
                                                    spatial_coor,
                                                    snpreader.iid,
                                                    alpha_list,
                                                    2,
                                                    self.pheno_whole,
                                                    jackknife_count=2,
                                                    permute_plus_count=1,
                                                    permute_times_count=1,
                                                    just_testing=False)

        dataframe.to_csv(tmpOutfile, sep="\t", index=False)
        referenceOutfile = TestFeatureSelection.reference_file(
            "heritability_spatial_correction/" + fn)
        out, msg = ut.compare_files(tmpOutfile, referenceOutfile, tolerance)
        self.assertTrue(
            out,
            "msg='{0}', ref='{1}', tmp='{2}'".format(msg, referenceOutfile,
                                                     tmpOutfile))
Ejemplo n.º 2
0
    def test_match_cpp(self):
        '''
        match
            FaSTLMM.207\Data\DemoData>..\.cd.\bin\windows\cpp_mkl\fastlmmc -bfile snps -extract topsnps.txt -bfileSim snps -extractSim ASout.snps.txt -pheno pheno.txt -covar covariate.txt -out topsnps.singlesnp.txt -logDelta 0 -verbose 100

        '''
        logging.info("TestSingleSnp test_match_cpp")
        snps = Bed(os.path.join(self.pythonpath, "tests/datasets/selecttest/snps"), count_A1=False)
        pheno = os.path.join(self.pythonpath, "tests/datasets/selecttest/pheno.txt")
        covar = os.path.join(self.pythonpath, "tests/datasets/selecttest/covariate.txt")
        sim_sid = ["snp26250_m0_.19m1_.19","snp82500_m0_.28m1_.28","snp63751_m0_.23m1_.23","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp11253_m0_.2m1_.2","snp86250_m0_.33m1_.33","snp3753_m0_.23m1_.23","snp75003_m0_.32m1_.32","snp30002_m0_.25m1_.25","snp26252_m0_.19m1_.19","snp67501_m0_.15m1_.15","snp63750_m0_.28m1_.28","snp30001_m0_.28m1_.28","snp52502_m0_.35m1_.35","snp33752_m0_.31m1_.31","snp37503_m0_.37m1_.37","snp15002_m0_.11m1_.11","snp3751_m0_.34m1_.34","snp7502_m0_.18m1_.18","snp52503_m0_.3m1_.3","snp30000_m0_.39m1_.39","isnp4457_m0_.11m1_.11","isnp23145_m0_.2m1_.2","snp60001_m0_.39m1_.39","snp33753_m0_.16m1_.16","isnp60813_m0_.2m1_.2","snp82502_m0_.34m1_.34","snp11252_m0_.13m1_.13"]
        sim_idx = snps.sid_to_index(sim_sid)
        test_sid = ["snp26250_m0_.19m1_.19","snp63751_m0_.23m1_.23","snp82500_m0_.28m1_.28","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp86250_m0_.33m1_.33","snp15002_m0_.11m1_.11","snp33752_m0_.31m1_.31","snp26252_m0_.19m1_.19","snp30001_m0_.28m1_.28","snp11253_m0_.2m1_.2","snp67501_m0_.15m1_.15","snp3753_m0_.23m1_.23","snp52502_m0_.35m1_.35","snp30000_m0_.39m1_.39","snp30002_m0_.25m1_.25"]
        test_idx = snps.sid_to_index(test_sid)

        for G0,G1 in [(snps[:,sim_idx],KernelIdentity(snps.iid)),(KernelIdentity(snps.iid),snps[:,sim_idx])]:
            frame_h2 = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,h2=.5,leave_out_one_chrom=False,count_A1=False)
            frame_log_delta = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,log_delta=0,leave_out_one_chrom=False,count_A1=False)
            for frame in [frame_h2, frame_log_delta]:
                referenceOutfile = TestFeatureSelection.reference_file("single_snp/topsnps.single.txt")
                reference = pd.read_table(referenceOutfile,sep="\t") # We've manually remove all comments and blank lines from this file
                assert len(frame) == len(reference)
                for _, row in reference.iterrows():
                    sid = row.SNP
                    pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
                    reldiff = abs(row.Pvalue - pvalue)/row.Pvalue
                    assert reldiff < .035, "'{0}' pvalue_list differ too much {4} -- {2} vs {3}".format(sid,None,row.Pvalue,pvalue,reldiff)
Ejemplo n.º 3
0
    def compare_files(self, sid0_list, sid1_list, pvalue_list, ref_base):
        reffile = TestFeatureSelection.reference_file("epistasis/" + ref_base +
                                                      ".txt")

        pair_to_pvalue = {}
        for index, sid0 in enumerate(sid0_list):
            sid1 = sid1_list[index]
            if sid0 < sid1:
                key = (sid0, sid1)
            else:
                key = (sid1, sid0)
            pair_to_pvalue[key] = pvalue_list[index]

        reference = sp.loadtxt(reffile, dtype='str', comments=None, skiprows=1)
        assert len(pvalue_list) == len(
            reference), "# of pairs differs from file '{0}'".format(reffile)
        for row in reference:
            sid0 = row[0]
            sid1 = row[4]
            if sid0 < sid1:
                key = (sid0, sid1)
            else:
                key = (sid1, sid0)

            assert abs(
                float(row[8]) - pair_to_pvalue[key]
            ) < 1e-5, "pair {0} differs too much from file '{1}'".format(
                key, reffile)
Ejemplo n.º 4
0
    def compare_files(self, frame, ref_base):
        reffile = TestFeatureSelection.reference_file("single_snp/" +
                                                      ref_base + ".txt")

        #sid_list,pvalue_list = frame['SNP'].values,frame['Pvalue'].values

        #sid_to_pvalue = {}
        #for index, sid in enumerate(sid_list):
        #    sid_to_pvalue[sid] = pvalue_list[index]

        reference = pd.read_csv(reffile,
                                delimiter='\s',
                                comment=None,
                                engine='python')
        assert len(frame) == len(
            reference), "# of pairs differs from file '{0}'".format(reffile)
        for _, row in reference.iterrows():
            sid = row.SNP
            pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
            diff = abs(row.PValue - pvalue)
            if diff > 1e-5 or np.isnan(diff):
                raise Exception(
                    "pair {0} differs too much from file '{1}'".format(
                        sid, reffile))
            assert abs(row.PValue - pvalue) < 1e-5, "wrong"
Ejemplo n.º 5
0
    def compare_files(self, frame, ref_base):
        reffile = TestFeatureSelection.reference_file("single_snp_select/" +
                                                      ref_base + ".txt")

        #sid_list,pvalue_list = frame['SNP'].values,frame['Pvalue'].values

        #sid_to_pvalue = {}
        #for index, sid in enumerate(sid_list):
        #    sid_to_pvalue[sid] = pvalue_list[index]

        reference = pd.read_csv(reffile,
                                delimiter='\s',
                                comment=None,
                                engine='python')
        if 'Pvalue' in reference.columns:
            reference[
                'PValue'] = reference.Pvalue  #add a new column with different capitalization if it is there

        assert len(frame) == len(
            reference), "# of pairs differs from file '{0}'".format(reffile)
        for _, row in reference.iterrows():
            sid = row.SNP
            pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
            assert abs(
                row.PValue - pvalue
            ) < 1e-5, "pair {0} differs too much from file '{1}'".format(
                sid, reffile)
Ejemplo n.º 6
0
 def compare_files(self,answer,ref_base):
     reffile = TestFeatureSelection.reference_file("fastlmm/"+ref_base+".dat") #Uses same results folder as lmm_train
     reference=Dat(reffile).read()
     assert np.array_equal(answer.col,reference.col), "sid differs. File '{0}'".format(reffile)
     assert np.array_equal(answer.row,reference.row), "iid differs. File '{0}'".format(reffile)
     for iid_index in range(reference.row_count):
         for sid_index in range(reference.col_count):
             a_v = answer.val[iid_index,sid_index]
             r_v = reference.val[iid_index,sid_index]
             assert abs(a_v - r_v) < 1e-4, "Value at {0},{1} differs too much from file '{2}'".format(iid_index,sid_index,reffile)
Ejemplo n.º 7
0
 def compare_files(self,answer,ref_base):
     reffile = TestFeatureSelection.reference_file("fastlmm/"+ref_base+".dat") #Uses same results folder as lmm_train
     reference=Dat(reffile).read()
     assert np.array_equal(answer.col,reference.col), "sid differs. File '{0}'".format(reffile)
     assert np.array_equal(answer.row,reference.row), "iid differs. File '{0}'".format(reffile)
     for iid_index in xrange(reference.row_count):
         for sid_index in xrange(reference.col_count):
             a_v = answer.val[iid_index,sid_index]
             r_v = reference.val[iid_index,sid_index]
             assert abs(a_v - r_v) < 1e-4, "Value at {0},{1} differs too much from file '{2}'".format(iid_index,sid_index,reffile)
Ejemplo n.º 8
0
    def compare_files(self,frame,ref_base):
        reffile = TestFeatureSelection.reference_file("single_snp/"+ref_base+".txt")

        #sid_list,pvalue_list = frame['SNP'].values,frame['Pvalue'].values

        #sid_to_pvalue = {}
        #for index, sid in enumerate(sid_list):
        #    sid_to_pvalue[sid] = pvalue_list[index]

        reference=pd.read_csv(reffile,delimiter='\s',comment=None,engine='python')
        assert len(frame) == len(reference), "# of pairs differs from file '{0}'".format(reffile)
        for _, row in reference.iterrows():
            sid = row.SNP
            pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
            assert abs(row.PValue - pvalue) < 1e-5, "pair {0} differs too much from file '{1}'".format(sid,reffile)
Ejemplo n.º 9
0
    def compare_files(self,frame,ref_base):
        reffile = TestFeatureSelection.reference_file("single_snp/"+ref_base+".txt") #Results are in single_snp, not single_snp_lin_reg

        #sid_list,pvalue_list = frame['SNP'].as_matrix(),frame['Pvalue'].as_matrix()

        #sid_to_pvalue = {}
        #for index, sid in enumerate(sid_list):
        #    sid_to_pvalue[sid] = pvalue_list[index]

        reference=pd.read_csv(reffile,delimiter='\s',comment=None,engine='python')
        assert len(frame) == len(reference), "# of pairs differs from file '{0}'".format(reffile)
        for _, row in reference.iterrows():
            sid = row.SNP
            pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
            assert abs(row.PValue - pvalue) < 1e-5, "pair {0} differs too much from file '{1}'".format(sid,reffile)
Ejemplo n.º 10
0
    def compare_files(self,frame,ref_base):
        reffile = TestFeatureSelection.reference_file("single_snp/"+ref_base+".txt")

        #sid_list,pvalue_list = frame['SNP'].values,frame['Pvalue'].values

        #sid_to_pvalue = {}
        #for index, sid in enumerate(sid_list):
        #    sid_to_pvalue[sid] = pvalue_list[index]

        reference=pd.read_csv(reffile,delimiter='\s',comment=None,engine='python')
        assert len(frame) == len(reference), "# of pairs differs from file '{0}'".format(reffile)
        frame.set_index('SNP',inplace=True)
        reference.set_index('SNP',inplace=True)
        diff = (frame.PValue-reference.PValue)
        bad = diff[np.abs(diff)>1e-5]
        if len(bad) > 0:
            raise Exception("snps differ too much from file '{0}' at these snps {1}".format(reffile,bad))
    def compare_files(self,frame,ref_base):
        reffile = TestFeatureSelection.reference_file("single_snp_all_plus_select/"+ref_base+".txt")

        #sid_list,pvalue_list = frame['SNP'].as_matrix(),frame['Pvalue'].as_matrix()

        #sid_to_pvalue = {}
        #for index, sid in enumerate(sid_list):
        #    sid_to_pvalue[sid] = pvalue_list[index]

        reference=pd.read_csv(reffile,delimiter='\s',comment=None,engine='python')
        if 'Pvalue' in reference.columns: reference['PValue']=reference.Pvalue #add a new column with different capitalization if it is there


        assert len(frame) == len(reference), "# of pairs differs from file '{0}'".format(reffile)
        for _, row in reference.iterrows():
            sid = row.SNP
            pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
            assert abs(row.PValue - pvalue) < 1e-5, "pair {0} differs too much from file '{1}'".format(sid,reffile)
    def test_two(self):
        '''
        Lock in results on arbitrary data -- because meaningful runs take too long to run.
        '''
        fn = "two.txt"
        logging.info(fn)
        tmpOutfile = self.file_name(fn)

        snpreader = self.snpreader_whole[:10,:]

        spatial_coor = [[i,-i] for i in xrange(snpreader.iid_count)]
        alpha_list = alpha_list_big=[int(v) for v in np.logspace(2,np.log10(4000), 2)]
        dataframe = heritability_spatial_correction(snpreader,spatial_coor,snpreader.iid,alpha_list,self.pheno_whole,jackknife_count=2,permute_plus_count=1,permute_times_count=1,just_testing=False)

        dataframe.to_csv(tmpOutfile,sep="\t",index=False)
        referenceOutfile = TestFeatureSelection.reference_file("heritability_spatial_correction/"+fn)
        out,msg=ut.compare_files(tmpOutfile, referenceOutfile, tolerance)                
        self.assertTrue(out, "msg='{0}', ref='{1}', tmp='{2}'".format(msg, referenceOutfile, tmpOutfile))
Ejemplo n.º 13
0
    def compare_files(self,sid0_list,sid1_list,pvalue_list,ref_base):
        reffile = TestFeatureSelection.reference_file("epistasis/"+ref_base+".txt")

        pair_to_pvalue = {}
        for index, sid0 in enumerate(sid0_list):
            sid1 = sid1_list[index]
            if sid0 < sid1:
                key = (sid0, sid1)
            else:
                key = (sid1, sid0)
            pair_to_pvalue[key] = pvalue_list[index]

        reference=sp.loadtxt(reffile,dtype='str',comments=None,skiprows=1)
        assert len(pvalue_list) == len(reference), "# of pairs differs from file '{0}'".format(reffile)
        for row in reference:
            sid0 = row[0]
            sid1 = row[4]
            if sid0 < sid1:
                key = (sid0, sid1)
            else:
                key = (sid1, sid0)

            assert abs(float(row[8])-pair_to_pvalue[key]) < 1e-5, "pair {0} differs too much from file '{1}'".format(key,reffile)
Ejemplo n.º 14
0
    def test_match_cpp(self):
        '''
        match
            FaSTLMM.207\Data\DemoData>fastlmmc -snpPairs -bfile snps -extract topsnps.txt -bfileSim snps -extractSim ASout.snps.txt -pheno pheno.txt -covar covariate.txt -out topsnps.pairs.txt -logDelta 0 -verbose 100

        '''
        logging.info("TestEpistasis test_match_cpp")
        from pysnptools.snpreader import Bed
        snps = Bed(os.path.join(self.pythonpath, "tests/datasets/selecttest/snps"))
        pheno = os.path.join(self.pythonpath, "tests/datasets/selecttest/pheno.txt")
        covar = os.path.join(self.pythonpath, "tests/datasets/selecttest/covariate.txt")
        sim_sid = ["snp26250_m0_.19m1_.19","snp82500_m0_.28m1_.28","snp63751_m0_.23m1_.23","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp11253_m0_.2m1_.2","snp86250_m0_.33m1_.33","snp3753_m0_.23m1_.23","snp75003_m0_.32m1_.32","snp30002_m0_.25m1_.25","snp26252_m0_.19m1_.19","snp67501_m0_.15m1_.15","snp63750_m0_.28m1_.28","snp30001_m0_.28m1_.28","snp52502_m0_.35m1_.35","snp33752_m0_.31m1_.31","snp37503_m0_.37m1_.37","snp15002_m0_.11m1_.11","snp3751_m0_.34m1_.34","snp7502_m0_.18m1_.18","snp52503_m0_.3m1_.3","snp30000_m0_.39m1_.39","isnp4457_m0_.11m1_.11","isnp23145_m0_.2m1_.2","snp60001_m0_.39m1_.39","snp33753_m0_.16m1_.16","isnp60813_m0_.2m1_.2","snp82502_m0_.34m1_.34","snp11252_m0_.13m1_.13"]
        sim_idx = snps.sid_to_index(sim_sid)
        test_sid = ["snp26250_m0_.19m1_.19","snp63751_m0_.23m1_.23","snp82500_m0_.28m1_.28","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp86250_m0_.33m1_.33","snp15002_m0_.11m1_.11","snp33752_m0_.31m1_.31","snp26252_m0_.19m1_.19","snp30001_m0_.28m1_.28","snp11253_m0_.2m1_.2","snp67501_m0_.15m1_.15","snp3753_m0_.23m1_.23","snp52502_m0_.35m1_.35","snp30000_m0_.39m1_.39","snp30002_m0_.25m1_.25"]
        test_idx = snps.sid_to_index(test_sid)

        frame = epistasis(snps[:,test_idx], pheno,covar=covar, G0 = snps[:,sim_idx],log_delta=0)
        sid0,sid1,pvalue_list =np.array(frame['SNP0']),np.array(frame['SNP1']),np.array(frame['PValue'])

        referenceOutfile = TestFeatureSelection.reference_file("epistasis/topsnps.pairs.txt")

        import pandas as pd
        table = pd.read_table(referenceOutfile,sep="\t") # We've manually remove all comments and blank lines from this file
        assert len(pvalue_list) == len(table)
        for row in table.iterrows():
            snp0cpp,snp1cpp,pvaluecpp,i1,i2 = row[1]
            for i in xrange(len(pvalue_list)):
                found = False
                pvaluepy = pvalue_list[i]
                snp0py = sid0[i]
                snp1py = sid1[i]
                if (snp0py == snp0cpp and snp1py == snp1cpp) or (snp0py == snp1cpp and snp1py == snp0cpp):
                    found = True
                    diff = abs(pvaluecpp - pvaluepy)/pvaluecpp
                    assert diff < .035, "'{0}' '{1}' pvalue_list differ too much {4} -- {2} vs {3}".format(snp0cpp,snp1cpp,pvaluecpp,pvaluepy,diff)
                    break
            assert found
Ejemplo n.º 15
0
    def compare_files(self, frame, ref_base):
        reffile = TestFeatureSelection.reference_file("single_snp/" +
                                                      ref_base + ".txt")

        #sid_list,pvalue_list = frame['SNP'].as_matrix(),frame['Pvalue'].as_matrix()

        #sid_to_pvalue = {}
        #for index, sid in enumerate(sid_list):
        #    sid_to_pvalue[sid] = pvalue_list[index]

        reference = pd.read_csv(reffile,
                                delimiter='\s',
                                comment=None,
                                engine='python')
        assert len(frame) == len(
            reference), "# of pairs differs from file '{0}'".format(reffile)
        frame.set_index('SNP', inplace=True)
        reference.set_index('SNP', inplace=True)
        diff = (frame.PValue - reference.PValue)
        bad = diff[np.abs(diff) > 1e-5]
        if len(bad) > 0:
            raise Exception(
                "snps differ too much from file '{0}' at these snps {1}".
                format(reffile, bad))
Ejemplo n.º 16
0
    def test_match_cpp(self):
        '''
        match
            FaSTLMM.207\Data\DemoData>..\.cd.\bin\windows\cpp_mkl\fastlmmc -bfile snps -extract topsnps.txt -bfileSim snps -extractSim ASout.snps.txt -pheno pheno.txt -covar covariate.txt -out topsnps.singlesnp.txt -logDelta 0 -verbose 100

        '''
        logging.info("TestSingleSnp test_match_cpp")
        snps = Bed(
            os.path.join(self.pythonpath, "tests/datasets/selecttest/snps"))
        pheno = os.path.join(self.pythonpath,
                             "tests/datasets/selecttest/pheno.txt")
        covar = os.path.join(self.pythonpath,
                             "tests/datasets/selecttest/covariate.txt")
        sim_sid = [
            "snp26250_m0_.19m1_.19", "snp82500_m0_.28m1_.28",
            "snp63751_m0_.23m1_.23", "snp48753_m0_.4m1_.4",
            "snp45001_m0_.26m1_.26", "snp52500_m0_.05m1_.05",
            "snp75002_m0_.39m1_.39", "snp41253_m0_.07m1_.07",
            "snp11253_m0_.2m1_.2", "snp86250_m0_.33m1_.33",
            "snp3753_m0_.23m1_.23", "snp75003_m0_.32m1_.32",
            "snp30002_m0_.25m1_.25", "snp26252_m0_.19m1_.19",
            "snp67501_m0_.15m1_.15", "snp63750_m0_.28m1_.28",
            "snp30001_m0_.28m1_.28", "snp52502_m0_.35m1_.35",
            "snp33752_m0_.31m1_.31", "snp37503_m0_.37m1_.37",
            "snp15002_m0_.11m1_.11", "snp3751_m0_.34m1_.34",
            "snp7502_m0_.18m1_.18", "snp52503_m0_.3m1_.3",
            "snp30000_m0_.39m1_.39", "isnp4457_m0_.11m1_.11",
            "isnp23145_m0_.2m1_.2", "snp60001_m0_.39m1_.39",
            "snp33753_m0_.16m1_.16", "isnp60813_m0_.2m1_.2",
            "snp82502_m0_.34m1_.34", "snp11252_m0_.13m1_.13"
        ]
        sim_idx = snps.sid_to_index(sim_sid)
        test_sid = [
            "snp26250_m0_.19m1_.19", "snp63751_m0_.23m1_.23",
            "snp82500_m0_.28m1_.28", "snp48753_m0_.4m1_.4",
            "snp45001_m0_.26m1_.26", "snp52500_m0_.05m1_.05",
            "snp75002_m0_.39m1_.39", "snp41253_m0_.07m1_.07",
            "snp86250_m0_.33m1_.33", "snp15002_m0_.11m1_.11",
            "snp33752_m0_.31m1_.31", "snp26252_m0_.19m1_.19",
            "snp30001_m0_.28m1_.28", "snp11253_m0_.2m1_.2",
            "snp67501_m0_.15m1_.15", "snp3753_m0_.23m1_.23",
            "snp52502_m0_.35m1_.35", "snp30000_m0_.39m1_.39",
            "snp30002_m0_.25m1_.25"
        ]
        test_idx = snps.sid_to_index(test_sid)

        for G0, G1 in [(snps[:, sim_idx], KernelIdentity(snps.iid)),
                       (KernelIdentity(snps.iid), snps[:, sim_idx])]:
            frame_h2 = single_snp(test_snps=snps[:, test_idx],
                                  pheno=pheno,
                                  G0=G0,
                                  G1=G1,
                                  covar=covar,
                                  h2=.5,
                                  leave_out_one_chrom=False)
            frame_log_delta = single_snp(test_snps=snps[:, test_idx],
                                         pheno=pheno,
                                         G0=G0,
                                         G1=G1,
                                         covar=covar,
                                         log_delta=0,
                                         leave_out_one_chrom=False)
            for frame in [frame_h2, frame_log_delta]:
                referenceOutfile = TestFeatureSelection.reference_file(
                    "single_snp/topsnps.single.txt")
                reference = pd.read_table(
                    referenceOutfile, sep="\t"
                )  # We've manually remove all comments and blank lines from this file
                assert len(frame) == len(reference)
                for _, row in reference.iterrows():
                    sid = row.SNP
                    pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
                    reldiff = abs(row.Pvalue - pvalue) / row.Pvalue
                    assert reldiff < .035, "'{0}' pvalue_list differ too much {4} -- {2} vs {3}".format(
                        sid, None, row.Pvalue, pvalue, reldiff)