def testBoundaryChr10(self): BoundaryCheck.set_chrom('chr10') b = BoundaryCheck(mb=[1,3]) self.assertTrue(b.valid) self.assertFalse(b.NoExclusions()) self.assertTrue(b.TestBoundary(10, 1000000, "")) self.assertTrue(b.TestBoundary(10, 1200000, "")) self.assertTrue(b.TestBoundary(10, 3000000, "")) self.assertFalse(b.TestBoundary(10, 3000001, "")) self.assertFalse(b.TestBoundary(10, 999999, "")) self.assertFalse(b.TestBoundary(1, 1000500, ""))
def testBoundaryInitBPWithInclusions(self): BoundaryCheck.chrom = 1 b = BoundaryCheck(bp=[10000, 500000]) b.LoadSNPs(["rs12345", "rs23456"]) self.assertFalse(b.NoExclusions()) self.assertTrue(b.valid) self.assertEqual(False, b.TestBoundary(1, 500, "")) self.assertEqual(True, b.TestBoundary(1, 10000, "")) self.assertEqual(True, b.TestBoundary(1, 500000, "")) self.assertEqual(True, b.TestBoundary(1, 250000, "")) self.assertEqual(False, b.TestBoundary(2, 250000, "")) self.assertEqual(False, b.TestBoundary(10, 10000, "")) self.assertTrue(b.TestBoundary(1, 1000000, "rs12345")) self.assertTrue(b.TestBoundary(1, 1200000, "rs23456")) self.assertFalse(b.TestBoundary(1, 1200011, "rs345678"))
def testTpedAnalysis(self): # We'll start with the correct phenotype with the genotypes, so we'll use # a boundary to restrict us to only use the first SNP BoundaryCheck.chrom = 1 DataParser.boundary = BoundaryCheck() pheno = PhenoCovar() dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) dataset.load_tfam(pheno) dataset.load_genotypes() results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)] self.assertAlmostEqual(0.0034756155, results[0].p_mvtest, places=6) self.assertAlmostEqual(0.1134684009, results[0].betas[1], places=6) self.assertAlmostEqual(0.0337649965541, results[0].beta_stderr[1], places=6) self.assertAlmostEqual(0.0007779211, results[0].beta_pvalues[1], places=6) self.assertAlmostEqual(-0.0033479839, results[0].betas[3], places=6) self.assertAlmostEqual(0.0492050029324, results[0].beta_stderr[3], places=6) self.assertAlmostEqual(0.9457525716, results[0].beta_pvalues[3], places=6) self.assertAlmostEqual(0.57778118, results[1].p_mvtest, places=6) self.assertAlmostEqual(0.02798537, results[1].betas[1], places=6) self.assertAlmostEqual(0.033790691857, results[1].beta_stderr[1], places=6) self.assertAlmostEqual(0.40755865, results[1].beta_pvalues[1], places=6) self.assertAlmostEqual(0.03275892, results[1].betas[3], places=6) self.assertAlmostEqual(0.0475661, results[1].beta_stderr[3], places=6) self.assertAlmostEqual(0.49101013, results[1].beta_pvalues[3], places=6) self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6) self.assertAlmostEqual(0.01663975, results[2].betas[1], places=6) self.assertAlmostEqual(0.03443300, results[2].beta_stderr[1], places=6) self.assertAlmostEqual(0.62891811, results[2].beta_pvalues[1], places=6) self.assertAlmostEqual(0.05712017, results[2].betas[3], places=6) self.assertAlmostEqual(0.04783608, results[2].beta_stderr[3], places=6) self.assertAlmostEqual(0.232446188, results[2].beta_pvalues[3], places=6)
def setUp(self): self.allele_1 = list("AAACCCGGGTCGTGTATACC") self.allele_2 = list("CGTGTATACCAAACCCGGGT") self.WriteTestFiles() self.phenotypes = [0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0] self.sex = [1,2,1,1,2,2,1,1,2,2,2,1] self.chrpos_encoding = mach_parser.Parser.chrpos_encoding self.dosage_ext = mach_parser.Parser.dosage_ext self.info_ext = mach_parser.Parser.info_ext self.chrom = BoundaryCheck.chrom self.boundary = DataParser.boundary DataParser.boundary = BoundaryCheck() self.min_maf = DataParser.min_maf self.max_maf = DataParser.max_maf self.snp_miss_tol = DataParser.snp_miss_tol self.ind_miss_tol = DataParser.ind_miss_tol DataParser.ind_exclusions = [] self.sex_as_covar = PhenoCovar.sex_as_covariate self.has_sex = DataParser.has_sex self.has_pheno = DataParser.has_pheno self.has_parents = DataParser.has_parents self.has_fid = DataParser.has_fid self.has_liability = DataParser.has_liability self.encoding = mach_parser.encoding self.compression = DataParser.compressed_pedigree DataParser.compressed_pedigree = True #self.chunk_stride = mach_parser.chunk_stride self.standardizer = libgwas.standardizer.get_standardizer() libgwas.standardizer.set_standardizer(libgwas.standardizer.NoStandardization)
def testBoundariedMiddle(self): BoundaryCheck.chrom = 4 DataParser.boundary = BoundaryCheck(bp=[30734, 33528]) impute_parser.encoding = impute_parser.Encoding.Recessive PhenoCovar.sex_as_covariate = True pc = PhenoCovar() parser = impute_parser.Parser(self.fam_file, [self.gen_file, self.gen_file2], chroms=[3, 4]) parser.load_family_details(pc) parser.load_genotypes() idx = 0 dropped = 0 for snp in parser: while self.positions[idx] < 30734 or self.positions[idx] > 33528: idx += 1 dropped += 1 self.assertEqual(self.positions[idx], snp.pos) for i in range(0, len(self.recessive_encoding[idx])): self.assertAlmostEqual(self.recessive_encoding[idx][i], snp.genotype_data[i], places=3) idx += 1 self.assertEqual(12, dropped)
def testBedAnalysisCov(self): PhenoCovar.sex_as_covariate = True DataParser.boundary = BoundaryCheck() pheno = PhenoCovar() ped_parser = bed_parser.Parser(self.nonmissing_fam, self.nonmissing_bim, self.nonmissing_bed) ped_parser.load_fam(pheno) ped_parser.load_bim(map3=False) ped_parser.load_genotypes() results = [x for x in mv_esteq.RunAnalysis(ped_parser, pheno)] self.assertAlmostEqual(0.0034238, results[0].p_mvtest, places=6) self.assertAlmostEqual(0.0143949, results[0].lmpv, places=6) self.assertAlmostEqual(0.58495059, results[1].p_mvtest, places=6) self.assertAlmostEqual(0.65786, results[1].lmpv, places=5) self.assertAlmostEqual(0.45178985, results[2].p_mvtest, places=6) self.assertAlmostEqual(0.83956, results[2].lmpv, places=5) self.assertAlmostEqual(0.133661, results[3].p_mvtest, places=6) self.assertAlmostEqual(0.82169, results[3].lmpv, places=5) self.assertAlmostEqual(0.541391, results[4].p_mvtest, places=6) self.assertAlmostEqual(0.83595, results[4].lmpv, places=5) self.assertAlmostEqual(0.035665, results[5].p_mvtest, places=6) self.assertAlmostEqual(0.94900, results[5].lmpv, places=5) self.assertAlmostEqual(0.784660, results[6].p_mvtest, places=6) self.assertAlmostEqual(0.59324, results[6].lmpv, places=5) self.assertAlmostEqual(0.2137434, results[7].p_mvtest, places=6) self.assertAlmostEqual(0.18069, results[7].lmpv, places=5) self.assertAlmostEqual(0.8160148, results[8].p_mvtest, places=6) self.assertAlmostEqual(0.79734, results[8].lmpv, places=5)
def setUp(self): self.allele_1 = list("AAACCCGGGTCGTGTATACC") self.allele_2 = list("CGTGTATACCAAACCCGGGT") self.WriteTestFiles() self.phenotypes = [ 0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0 ] self.sex = [1, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1] self.gen_ext = impute_parser.Parser.gen_ext self.chrom = BoundaryCheck.chrom self.boundary = DataParser.boundary DataParser.boundary = BoundaryCheck() self.min_maf = DataParser.min_maf self.max_maf = DataParser.max_maf self.snp_miss_tol = DataParser.snp_miss_tol self.ind_miss_tol = DataParser.ind_miss_tol DataParser.ind_exclusions = [] self.sex_as_covar = PhenoCovar.sex_as_covariate self.has_sex = DataParser.has_sex self.has_pheno = DataParser.has_pheno self.has_parents = DataParser.has_parents self.has_fid = DataParser.has_fid self.has_liability = DataParser.has_liability self.encoding = impute_parser.encoding self.compression = DataParser.compressed_pedigree DataParser.compressed_pedigree = True self.parser_info_thresh = impute_parser.Parser.info_threshold impute_parser.Parser.info_threshold = 0.0
def testPedBoundaryTPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = BoundaryCheck() BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = [x.split() for x in open(self.tped_filename).readlines()] index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertAlmostEqual(self.hetero_freq_tped[index], snp.hetero_freq, places=4) index += 1 self.assertEqual(3, ped_parser.locus_count) index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def setUp(self): self.WriteTestFiles() self.ped = get_lines(self.ped_filename) self.phenotypes = [ 0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0 ] self.sex = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1] self.chrom = BoundaryCheck.chrom self.boundary = DataParser.boundary DataParser.boundary = BoundaryCheck() self.min_maf = DataParser.min_maf self.max_maf = DataParser.max_maf self.snp_miss_tol = DataParser.snp_miss_tol self.ind_miss_tol = DataParser.ind_miss_tol DataParser.ind_exclusions = [] DataParser.ind_inclusions = [] self.sex_as_covar = PhenoCovar.sex_as_covariate self.has_sex = DataParser.has_sex self.has_pheno = DataParser.has_pheno self.has_parents = DataParser.has_parents self.has_fid = DataParser.has_fid self.has_liability = DataParser.has_liability self.sex_as_covariate = PhenoCovar.sex_as_covariate self.standardizer = libgwas.standardizer.get_standardizer() libgwas.standardizer.set_standardizer( libgwas.standardizer.NoStandardization)
def setUp(self): self.WriteTestFiles() self.phenotypes = [ 0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0 ] self.sex = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1] self.chrom = BoundaryCheck.chrom self.boundary = DataParser.boundary self.min_maf = DataParser.min_maf self.max_maf = DataParser.max_maf self.snp_miss_tol = DataParser.snp_miss_tol self.ind_miss_tol = DataParser.ind_miss_tol self.sex_as_covar = PhenoCovar.sex_as_covariate self.has_sex = DataParser.has_sex self.has_pheno = DataParser.has_pheno self.has_parents = DataParser.has_parents self.has_fid = DataParser.has_fid self.has_liability = DataParser.has_liability # the faked pheno/covariate non-missing self.non_missing = numpy.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) == 1 DataParser.boundary = BoundaryCheck()
def testBoundary(self): pc = PhenoCovar() DataParser.boundary = BoundaryCheck() BoundaryCheck.chrom = 2 parser = Parser(self.nonmissing, data_field='GT') parser.init_subjects(pc) parser.load_genotypes() index = 4 for snp in parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(self.nonmissing_mapdata[index][0]), snp.chr) self.assertEqual(int(self.nonmissing_mapdata[index][1]), snp.pos) self.assertEqual(self.nonmissing_mapdata[index][2], snp.rsid) self.assertEqual(self.genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(7, index)
def testBedAnalysis(self): # We'll start with the correct phenotype with the genotypes, so we'll use # a boundary to restrict us to only use the first SNP BoundaryCheck.chrom = 1 DataParser.boundary = BoundaryCheck() pheno = PhenoCovar() ped_parser = bed_parser.Parser(self.nonmissing_fam, self.nonmissing_bim, self.nonmissing_bed) ped_parser.load_fam(pheno) ped_parser.load_bim(map3=False) ped_parser.load_genotypes() results = [x for x in mv_esteq.RunAnalysis(ped_parser, pheno)] self.assertAlmostEqual(0.00347562, results[0].p_mvtest, places=6) self.assertAlmostEqual(0.00085539, results[0].lmpv, places=6) self.assertAlmostEqual(0.5777812, results[1].p_mvtest, places=6) self.assertAlmostEqual(0.42212155, results[1].lmpv, places=6) self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6) self.assertAlmostEqual(0.61386344, results[2].lmpv, places=6) self.assertAlmostEqual(0.13555597, results[3].p_mvtest, places=6) self.assertAlmostEqual(0.59682217, results[3].lmpv, places=6) self.assertAlmostEqual(0.54029842, results[4].p_mvtest, places=6) self.assertAlmostEqual(0.60475964, results[4].lmpv, places=6) self.assertAlmostEqual(0.03547514, results[5].p_mvtest, places=6) self.assertAlmostEqual(0.86663730, results[5].lmpv, places=6) self.assertAlmostEqual(0.79249216, results[6].p_mvtest, places=6) self.assertAlmostEqual(0.67678089, results[6].lmpv, places=6) self.assertAlmostEqual(0.20973300, results[7].p_mvtest, places=6) self.assertAlmostEqual(0.14431260, results[7].lmpv, places=6) self.assertAlmostEqual(0.81471528, results[8].p_mvtest, places=6) self.assertAlmostEqual(0.56378497, results[8].lmpv, places=6)
def testTpedBounded(self): BoundaryCheck.chrom = 1 DataParser.boundary = BoundaryCheck(bp=[2000,3000]) pheno = PhenoCovar() dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) dataset.load_tfam(pheno) dataset.load_genotypes() results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)] self.assertEqual(1, results[0].chr) self.assertEqual(2000, results[0].pos) self.assertAlmostEqual(0.57778118, results[0].p_mvtest, places=6) self.assertAlmostEqual(0.02798537, results[0].betas[1], places=6) self.assertAlmostEqual(0.033790691857, results[0].beta_stderr[1], places=6) self.assertAlmostEqual(0.40755865, results[0].beta_pvalues[1], places=6) self.assertAlmostEqual(0.03275892, results[0].betas[3], places=6) self.assertAlmostEqual(0.0475661, results[0].beta_stderr[3], places=6) self.assertAlmostEqual(0.49101013, results[0].beta_pvalues[3], places=6) self.assertAlmostEqual(0.44661276, results[1].p_mvtest, places=6) self.assertAlmostEqual(0.01663975, results[1].betas[1], places=6) self.assertAlmostEqual(0.03443300, results[1].beta_stderr[1], places=6) self.assertAlmostEqual(0.62891811, results[1].beta_pvalues[1], places=6) self.assertAlmostEqual(0.05712017, results[1].betas[3], places=6) self.assertAlmostEqual(0.04783608, results[1].beta_stderr[3], places=6) self.assertAlmostEqual(0.232446188, results[1].beta_pvalues[3], places=6)
def testBoundaryX(self): BoundaryCheck.set_chrom('X') b = BoundaryCheck() self.assertEqual(23, BoundaryCheck.chrom) self.assertEqual('X', BoundaryCheck.chrom_name) self.assertTrue(b.TestBoundary('x', 100, "rs100")) self.assertTrue(b.TestBoundary('X', 1000, "rs1000")) self.assertTrue(b.TestBoundary(23, 1000, "rs1000")) self.assertFalse(b.TestBoundary('Y', 100, "rs100"))
def testBoundaryInitKB(self): BoundaryCheck.chrom = 5 b = BoundaryCheck(kb=[20, 50]) self.assertFalse(b.NoExclusions()) self.assertEqual(True, b.valid) self.assertEqual(False, b.TestBoundary(5, 15000, "")) self.assertEqual(True, b.TestBoundary(5, 20000, "")) self.assertEqual(True, b.TestBoundary(5, 30000, "")) self.assertEqual(True, b.TestBoundary(5, 50000, "")) self.assertEqual(False, b.TestBoundary(5, 50001, "")) self.assertEqual(False, b.TestBoundary(1, 25000, "")) self.assertEqual(False, b.TestBoundary(10, 20000, ""))
def testBoundaryExceedPos(self): BoundaryCheck.chrom = 10 b = BoundaryCheck(mb=[1,3]) self.assertTrue(b.valid) self.assertFalse(b.NoExclusions()) self.assertFalse(b.TestBoundary(10, 100, "")) self.assertFalse(b.beyond_upper_bound) self.assertTrue(b.TestBoundary(10, 1000000, "")) self.assertFalse(b.beyond_upper_bound) self.assertTrue(b.TestBoundary(10, 1200000, "")) self.assertFalse(b.beyond_upper_bound) self.assertTrue(b.TestBoundary(10, 3000000, "")) self.assertFalse(b.beyond_upper_bound) self.assertFalse(b.TestBoundary(10, 3000001, "")) self.assertTrue(b.beyond_upper_bound) self.assertFalse(b.TestBoundary(10, 999999, "")) self.assertFalse(b.beyond_upper_bound)
def testMapFileWithRegionAndSnpExclusion(self): BoundaryCheck.chrom = 2 DataParser.boundary = BoundaryCheck(bp=[0, 10000]) ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() self.assertEqual(2, len(ped_parser.markers)) self.assertEqual(7, len(ped_parser.snp_mask[:, 0])) self.assertEqual(2, ped_parser.locus_count) # Masks are filters, so we should have 7 entries, but 4 will be 1 self.assertEqual(5, numpy.sum(ped_parser.snp_mask[:, 0])) self.assertEqual(0, ped_parser.snp_mask[4, 1]) self.assertEqual(0, ped_parser.snp_mask[5, 0])
def setUp(self): self.missing = "tests/bedfiles/ped_missing" self.missing_bed = resource_filename("libgwas", "%s.bed" % (self.missing)) self.missing_bim = resource_filename("libgwas", "%s.bim" % (self.missing)) self.missing_fam = resource_filename("libgwas", "%s.fam" % (self.missing)) self.genotypes = [[0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] self.nonmissing = "tests/bedfiles/ped_nomiss" self.nonmissing_bed = resource_filename("libgwas", "%s.bed" % (self.nonmissing)) self.nonmissing_bim = resource_filename("libgwas", "%s.bim" % (self.nonmissing)) self.nonmissing_fam = resource_filename("libgwas", "%s.fam" % (self.nonmissing)) self.genotypes_w_missing = [[0, 1], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] self.nonmissing_mapdata = libgwas.get_lines(self.nonmissing_bim, split=True) self.missing_mapdata = libgwas.get_lines(self.missing_bim, split=True) self.phenotypes = [ 0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0 ] self.sex = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1] self.chrom = BoundaryCheck.chrom self.boundary = DataParser.boundary self.min_maf = DataParser.min_maf self.max_maf = DataParser.max_maf self.snp_miss_tol = DataParser.snp_miss_tol self.ind_miss_tol = DataParser.ind_miss_tol self.sex_as_covar = PhenoCovar.sex_as_covariate self.has_sex = DataParser.has_sex self.has_pheno = DataParser.has_pheno self.has_parents = DataParser.has_parents self.has_fid = DataParser.has_fid self.has_liability = DataParser.has_liability DataParser.boundary = BoundaryCheck()
def testBoundaryInitBP(self): BoundaryCheck.chrom = 1 b = BoundaryCheck(bp=[10000, 500000]) self.assertFalse(b.NoExclusions()) self.assertTrue(b.valid) self.assertEqual(False, b.TestBoundary(1, 500, "")) self.assertEqual(True, b.TestBoundary(1, 10000, "")) self.assertEqual(True, b.TestBoundary(1, 500000, "")) self.assertEqual(True, b.TestBoundary(1, 250000, "")) self.assertEqual(False, b.TestBoundary(2, 250000, "")) self.assertEqual(False, b.TestBoundary(10, 10000, ""))
def testBoundaryInitMB(self): BoundaryCheck.chrom = 10 b = BoundaryCheck(mb=[1,3]) self.assertTrue(b.valid) self.assertFalse(b.NoExclusions()) self.assertTrue(b.TestBoundary(10, 1000000, "")) self.assertTrue(b.TestBoundary(10, 1200000, "")) self.assertTrue(b.TestBoundary(10, 3000000, "")) self.assertFalse(b.TestBoundary(10, 3000001, "")) self.assertFalse(b.TestBoundary(10, 999999, "")) self.assertFalse(b.TestBoundary(1, 1000500, ""))
def testDefaultBoundaryInitialization(self): # By default, it will identify as invalid, since it didn't find any boundaries # This is just for simplifying command line parsing BoundaryCheck.chrom = -1 b = BoundaryCheck() self.assertEqual(True, b.valid) # At this point, this should any valid chromosome/position combination self.assertTrue(b.TestBoundary(1, 100, "")) self.assertTrue(b.TestBoundary(10, 1000000, "")) self.assertTrue(b.TestBoundary(25, 10000, "")) self.assertTrue(b.TestBoundary('Y', 1400, "")) self.assertFalse(b.TestBoundary('FF', 35, "")) self.assertFalse(b.TestBoundary(28, 10000, "")) # We should test that our short circuit functionality works self.assertTrue(b.NoExclusions())
def testBoundedMiddle(self): mach_parser.Parser.chrpos_encoding = True BoundaryCheck.chrom = 2 DataParser.boundary = BoundaryCheck(bp=[1020, 1137]) PhenoCovar.sex_as_covariate = True pc = PhenoCovar() parser = mach_parser.Parser([self.gen_file, self.gen_file2]) parser.load_family_details(pc) parser.load_genotypes() idx = 8 for snp in parser: self.assertEqual(self.positions[idx], snp.pos) for i in range(0, len(self.dosage_encoding[idx])): self.assertAlmostEqual(self.dosage_encoding[idx][i], snp.genotype_data[i], places=3) idx += 1 self.assertEqual(13, idx)
def testPedBoundary(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) DataParser.boundary = BoundaryCheck() BoundaryCheck.chrom = 2 ped_parser.load_mapfile() ped_parser.load_genotypes(pc) pedigree = get_lines(self.map_filename, split=True) index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def setUp(self): self.missing = "bedfiles/analysis" self.missing_bed = resource_filename("tests", "%s.bed" % (self.missing)) self.missing_bim = resource_filename("tests", "%s.bim" % (self.missing)) self.missing_fam = resource_filename("tests", "%s.fam" % (self.missing)) self.genotypes = [[2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2], [1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1], [2, 0, 1, 1, 2, 2, 2, 0, 1, 1, 2, 2], [2, 1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2], [1, 0, 2, 1, 2, 2, 1, 0, 2, 1, 2, 2], [1, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2], [2, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2]] self.nonmissing = "bedfiles/analysis" self.nonmissing_bed = resource_filename("tests", "%s.bed" % (self.nonmissing)) self.nonmissing_bim = resource_filename("tests", "%s.bim" % (self.nonmissing)) self.nonmissing_fam = resource_filename("tests", "%s.fam" % (self.nonmissing)) self.genotypes_w_missing = [[ 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1 ], [1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1], [2, -1, 1, 1, 2, 2, 2, 0, 1, 1, 2, 2], [2, -1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2], [1, -1, 2, 1, 2, 2, 1, 0, 2, 1, 2, 2], [1, -1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2], [2, -1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2]] self.chrom = BoundaryCheck.chrom self.boundary = DataParser.boundary self.min_maf = DataParser.min_maf self.max_maf = DataParser.max_maf self.snp_miss_tol = DataParser.snp_miss_tol self.ind_miss_tol = DataParser.ind_miss_tol self.sex_as_covar = PhenoCovar.sex_as_covariate self.standardizer = libgwas.standardizer.get_standardizer() libgwas.standardizer.set_standardizer( libgwas.standardizer.NoStandardization) DataParser.boundary = BoundaryCheck()
def testBedBounded(self): BoundaryCheck.chrom = 1 DataParser.boundary = BoundaryCheck(bp=[2000, 3000]) pheno = PhenoCovar() ped_parser = bed_parser.Parser(self.nonmissing_fam, self.nonmissing_bim, self.nonmissing_bed) ped_parser.load_fam(pheno) ped_parser.load_bim(map3=False) ped_parser.load_genotypes() results = [x for x in mv_esteq.RunAnalysis(ped_parser, pheno)] self.assertEqual(1, results[0].chr) self.assertEqual(2000, results[0].pos) self.assertAlmostEqual(0.5777811, results[0].p_mvtest, places=6) self.assertAlmostEqual(0.4221215, results[0].lmpv, places=6) self.assertAlmostEqual(0.4466128, results[1].p_mvtest, places=6) self.assertAlmostEqual(0.6138634, results[1].lmpv, places=6)
def testBoundariedUpper(self): BoundaryCheck.chrom = 3 DataParser.boundary = BoundaryCheck(bp=[21000, 50000]) impute_parser.encoding = impute_parser.Encoding.Recessive PhenoCovar.sex_as_covariate = True pc = PhenoCovar() parser = impute_parser.Parser(self.fam_file, [self.gen_file, self.gen_file2], chroms=[3, 4]) parser.load_family_details(pc) parser.load_genotypes() idx = 6 for snp in parser: self.assertEqual(self.positions[idx], snp.pos) for i in range(0, len(self.recessive_encoding[idx])): self.assertAlmostEqual(self.recessive_encoding[idx][i], snp.genotype_data[i], places=3) idx += 1
def testPedBoundaryTPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = BoundaryCheck() BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = get_lines(self.tped_filename, split=True) index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) index += 1 self.assertEqual(3, ped_parser.locus_count) index = 4 for snp in ped_parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertAlmostEqual(self.hetero_freq_tped[index], genodata.hetero_freq, places=4) self.assertEqual(self.genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(7, index)
def testTPedAnalysisCov(self): PhenoCovar.sex_as_covariate = True DataParser.boundary = BoundaryCheck() pheno = PhenoCovar() dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) dataset.load_tfam(pheno) dataset.load_genotypes() #pheno.standardize_variables() results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)] self.assertAlmostEqual(0.00342380, results[0].p_mvtest, places=6) self.assertAlmostEqual(0.11362883, results[0].betas[1], places=6) self.assertAlmostEqual(0.0337610, results[0].beta_stderr[1], places=6) self.assertAlmostEqual(0.00076356, results[0].beta_pvalues[1], places=6) self.assertAlmostEqual(0.01911090, results[0].betas[3], places=6) self.assertAlmostEqual(0.10143178, results[0].beta_stderr[3], places=6) self.assertAlmostEqual(0.8505542, results[0].beta_pvalues[3], places=6) self.assertAlmostEqual(0.584950593047, results[1].p_mvtest, places=6) self.assertAlmostEqual(0.0276543736525, results[1].betas[1], places=6) self.assertAlmostEqual(0.03383588, results[1].beta_stderr[1], places=6) self.assertAlmostEqual(0.413751829881, results[1].beta_pvalues[1], places=6)
def testPedBoundaryBed(self): pc = PhenoCovar() DataParser.boundary = BoundaryCheck() BoundaryCheck.chrom = 2 ped_parser = bed_parser.Parser(self.nonmissing_fam, self.nonmissing_bim, self.nonmissing_bed) ped_parser.load_fam(pc) ped_parser.load_bim(map3=False) ped_parser.load_genotypes() pedigree = self.nonmissing_mapdata index = 4 valid_loci = 0 for snp in ped_parser: for y in pc: (pheno, covars, nonmissing) = y.get_variables(snp.missing_genotypes) try: genodata = snp.get_genotype_data(nonmissing) self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(genodata.genotypes)) valid_loci += 1 except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(3, valid_loci) self.assertEqual(7, index)
def testBoundaryInitBPWithExclusions(self): BoundaryCheck.chrom = 1 b = BoundaryCheck(bp=[10000, 500000]) b.LoadExclusions(["rs12345", "rs234567", "rs345678"]) self.assertFalse(b.NoExclusions()) self.assertTrue(b.valid) self.assertFalse(b.TestBoundary(1, 500, "")) self.assertTrue(b.TestBoundary(1, 10000, "")) self.assertFalse(b.TestBoundary(1, 10010, "rs12345")) self.assertTrue(b.TestBoundary(1, 24000, "rs9876")) self.assertFalse(b.TestBoundary(1, 25000, "rs234567")) self.assertTrue(b.TestBoundary(1, 250000, "")) self.assertTrue(b.TestBoundary(1, 500000, "")) self.assertFalse(b.TestBoundary(2, 250000, "")) self.assertFalse(b.TestBoundary(10, 10000, ""))