def setUp(self): self.missing = "tests/bedfiles/ped_missing" self.missing_bed = resource_filename("libgwas", "%s.bed" % (self.missing)) self.missing_bim = resource_filename("libgwas", "%s.bim" % (self.missing)) self.missing_fam = resource_filename("libgwas", "%s.fam" % (self.missing)) self.genotypes = [[0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] self.nonmissing = "tests/bedfiles/ped_nomiss" self.nonmissing_bed = resource_filename("libgwas", "%s.bed" % (self.nonmissing)) self.nonmissing_bim = resource_filename("libgwas", "%s.bim" % (self.nonmissing)) self.nonmissing_fam = resource_filename("libgwas", "%s.fam" % (self.nonmissing)) self.genotypes_w_missing = [[0, 1], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] self.nonmissing_mapdata = libgwas.get_lines(self.nonmissing_bim, split=True) self.missing_mapdata = libgwas.get_lines(self.missing_bim, split=True) self.phenotypes = [ 0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0 ] self.sex = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1] self.chrom = BoundaryCheck.chrom self.boundary = DataParser.boundary self.min_maf = DataParser.min_maf self.max_maf = DataParser.max_maf self.snp_miss_tol = DataParser.snp_miss_tol self.ind_miss_tol = DataParser.ind_miss_tol self.sex_as_covar = PhenoCovar.sex_as_covariate self.has_sex = DataParser.has_sex self.has_pheno = DataParser.has_pheno self.has_parents = DataParser.has_parents self.has_fid = DataParser.has_fid self.has_liability = DataParser.has_liability DataParser.boundary = BoundaryCheck()
def testPedWithMissingMxIndExclusionsToo(self): pc = PhenoCovar() DataParser.ind_exclusions = ["2:2", "3:3"] DataParser.ind_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.miss_tped_filename, split=True) genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, 1], [1, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 1, 0, 0, 0]] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testCmdLinePedigreeWithLiability(self): f = open(self.ped_filename, "w") f.write("""1 1 0 0 1 0.1 1 A A G T A A G G C T G T T T 2 2 0 0 1 0.4 1 A C G T G G C G T T G G C T 3 3 0 0 2 1.0 1 A A G G A G C C C C G T C T 4 4 0 0 2 0.5 1 A A G G A G C G C T G G T T 5 5 0 0 1 0.9 1 A C G G A A C G C C G G T T 6 6 0 0 1 1.0 1 A A G T A A G G C C G G T T 7 7 0 0 1 0.1 1 A A G T A A G G C T G G T T 8 8 0 0 1 0.4 1 A C G T G G C G T T G G C T 9 9 0 0 2 1.0 1 A A G G A G C C C C G T C T 10 10 0 0 2 0.5 1 A A G G A G C G C T G G T T 11 11 0 0 1 0.9 1 A C G G A A C G C C G G T T 12 12 0 0 1 1.0 1 A A G T A A G G C C G G T T""") f.close() cmds = "--file %s --liability" % (self.ped_filename.split(".")[0]) app = mvtest.MVTestApplication() ped_parser, pc, args = app.LoadCmdLine(cmds.split(" ")) mapdata = libgwas.get_lines(self.map_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedigreeNoPheno(self): DataParser.has_pheno = False with open(self.ped_filename, "w") as f: f.write("""1 1 0 0 1 A A G T A A G G C T G T T T 2 2 0 0 1 A C G T G G C G T T G G C T 3 3 0 0 2 A A G G A G C C C C G T C T 4 4 0 0 2 A A G G A G C G C T G G T T 5 5 0 0 1 A C G G A A C G C C G G T T 6 6 0 0 1 A A G T A A G G C C G G T T 7 7 0 0 1 A A G T A A G G C T G G T T 8 8 0 0 1 A C G T G G C G T T G G C T 9 9 0 0 2 A A G G A G C C C C G T C T 10 10 0 0 2 A A G G A G C G C T G G T T 11 11 0 0 1 A C G G A A C G C C G G T T 12 12 0 0 1 A A G T A A G G C C G G T T""") PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def setUp(self): self.WriteTestFiles() self.ped = get_lines(self.ped_filename) self.phenotypes = [ 0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0 ] self.sex = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1] self.chrom = BoundaryCheck.chrom self.boundary = DataParser.boundary DataParser.boundary = BoundaryCheck() self.min_maf = DataParser.min_maf self.max_maf = DataParser.max_maf self.snp_miss_tol = DataParser.snp_miss_tol self.ind_miss_tol = DataParser.ind_miss_tol DataParser.ind_exclusions = [] DataParser.ind_inclusions = [] self.sex_as_covar = PhenoCovar.sex_as_covariate self.has_sex = DataParser.has_sex self.has_pheno = DataParser.has_pheno self.has_parents = DataParser.has_parents self.has_fid = DataParser.has_fid self.has_liability = DataParser.has_liability self.sex_as_covariate = PhenoCovar.sex_as_covariate self.standardizer = libgwas.standardizer.get_standardizer() libgwas.standardizer.set_standardizer( libgwas.standardizer.NoStandardization)
def testForInvariant(self): prefix = "__test_pedigree" self.pheno_file = "%s_mch.txt" % (prefix) with open(self.pheno_file, "w") as f: f.write("""FID\tIID\tBMI\tIBM\tMSA 1\t1\t0.1\t1.0\t1.0 2\t2\t0.2\t0.5\t1.0 3\t3\t0.3\t0.6\t1.0 4\t4\t0.4\t0.5\t1.0 5\t5\t0.5\t1.0\t1.0 6\t6\t0.6\t0.1\t1.0 17\t7\t0.1\t1.0\t1.0 8\t8\t0.2\t0.5\t1.0 9\t9\t0.3\t0.6\t1.0 10\t10\t0.4\t0.5\t1.0 11\t11\t0.5\t1.0\t1.0 12\t12\t0.6\t0.1\t1.0""") PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) with open(self.pheno_file) as f2: pc.load_phenofile(f2, indices=[3]) index = 0 mapdata = get_lines(self.map_filename, split=True) with self.assertRaises(InvariantVar): for snp in ped_parser: for y in pc: non_missing = numpy.ones(len(snp.genotype_data), dtype=bool) (pheno, covariates, nonmissing) = y.get_variables( numpy.invert(non_missing))
def testPedigreeIndExclusionsMissingIndThresh(self): pc = PhenoCovar() DataParser.ind_exclusions = ["11:11", "12:12"] DataParser.ind_miss_tol = 0.5 # We should only lose 1 ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) genotypes = [[1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0], [-1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1], [2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2], [2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] index = 0 map_idx = 0 for snp in ped_parser: self.assertEqual(int(mapdata[map_idx][0]), snp.chr) self.assertEqual(int(mapdata[map_idx][3]), snp.pos) self.assertEqual(mapdata[map_idx][1], snp.rsid) self.assertEqual(genotypes[index][0:9], list(snp.genotype_data)) index += 1 map_idx += 1 self.assertEqual(7, index)
def testPedCmdLineMIND2(self): cmds = "--ped %s --map %s --mind=0.10" % (self.ped_filename_missing, self.map_filename) app = mvtest.MVTestApplication() dataset, vars, args = app.LoadCmdLine(cmds.split(" ")) genotypes = [[0, 0, 1, 0], [1, 0, 0, 1], [0, 1, 0, 0], [0, 1, 1, 0], [0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] mapdata = libgwas.get_lines(self.map_filename, split=True) index = 0 for snp in dataset: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(genotypes[index], list(genodata.genotypes)) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) index += 1 except TooMuchMissing as e: pass except InvalidFrequency as e: skipped += 1 except TooMuchMissingpPhenoCovar as e: pass self.assertEqual(5, index) # Last two are fixed
def testMissingSnpThresh(self): pc = PhenoCovar() DataParser.snp_miss_tol = 0.5 # We should only lose 1 ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) #self.assertEqual([0,1,0,0,0,0,0,0,0,0,0,0], list(ped_parser.individual_mask)) mapdata = get_lines(self.map_filename, split=True) index = 0 missing = 0 for snp in ped_parser: for y in pc: (pheno, covars, nonmissing) = y.get_variables(snp.missing_genotypes) try: genodata = snp.get_genotype_data(nonmissing) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.missing_genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: missing += 1 except InvalidFrequency as e: pass index += 1 self.assertEqual(1, missing) self.assertEqual(7, index)
def testMissingComplete(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 0 missing = 0 valid = 0 for snp in ped_parser: for y in pc: (pheno, covars, nonmissing) = y.get_variables(snp.missing_genotypes) try: genodata = snp.get_genotype_data(nonmissing) self.assertEqual(self.missing_genotypes[index], list(genodata.genotypes)) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) valid += 1 except TooMuchMissing as e: missing += 1 except InvalidFrequency as e: pass index += 1 self.assertEqual(0, missing) self.assertEqual(7, valid)
def testTPedNoParentsPheno(self): f = open(self.tfam_filename, "w") f.write("""1 1 1 2 2 1 3 3 2 4 4 2 5 5 1 6 6 1 7 7 1 8 8 1 9 9 2 10 10 2 11 11 1 12 12 1""") f.close() DataParser.has_parents = False DataParser.has_pheno = False pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.tped_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testCmdLinePedigreeNoFamSexOrParents(self): f = open(self.ped_filename, "w") f.write("""1 0.1 A A G T A A G G C T G T T T 2 0.4 A C G T G G C G T T G G C T 3 1.0 A A G G A G C C C C G T C T 4 0.5 A A G G A G C G C T G G T T 5 0.9 A C G G A A C G C C G G T T 6 1.0 A A G T A A G G C C G G T T 7 0.1 A A G T A A G G C T G G T T 8 0.4 A C G T G G C G T T G G C T 9 1.0 A A G G A G C C C C G T C T 10 0.5 A A G G A G C G C T G G T T 11 0.9 A C G G A A C G C C G G T T 12 1.0 A A G T A A G G C C G G T T""") f.close() cmds = "--file %s --no-parents --no-fid --no-sex" % ( self.ped_filename.split(".")[0]) app = mvtest.MVTestApplication() ped_parser, pc, args = app.LoadCmdLine(cmds.split(" ")) mapdata = libgwas.get_lines(self.map_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testTPedNoFamIDSex(self): f = open(self.tfam_filename, "w") f.write("""1 0 0 0.1 2 0 0 0.4 3 0 0 1.0 4 0 0 0.5 5 0 0 0.9 6 0 0 1.0 7 0 0 0.1 8 0 0 0.4 9 0 0 1.0 10 0 0 0.5 11 0 0 0.9 12 0 0 1.0""") f.close() DataParser.has_fid = False DataParser.has_sex = False pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.tped_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testTPedLiability(self): f = open(self.tfam_filename, "w") f.write("""1 1 0 0 1 0.1 1 2 2 0 0 1 0.4 1 3 3 0 0 2 1.0 1 4 4 0 0 2 0.5 1 5 5 0 0 1 0.9 1 6 6 0 0 1 1.0 1 7 7 0 0 1 0.1 1 8 8 0 0 1 0.4 1 9 9 0 0 2 1.0 1 10 10 0 0 2 0.5 1 11 11 0 0 1 0.9 1 12 12 0 0 1 1.0 1""") f.close() DataParser.has_liability = True pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.tped_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedRegionBoundaryWithExclusionsTPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"]) DataParser.boundary.LoadExclusions(snps=["rs0007"]) BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = libgwas.get_lines(self.tped_filename, split=True) index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) index += 1 index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(6, index)
def testPedSnpBoundary2TPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0006"]) BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = libgwas.get_lines(self.tped_filename, split=True) index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertAlmostEqual(self.hetero_freq_tped[index], snp.hetero_freq, places=4) index += 1 self.assertEqual(2, ped_parser.locus_count) index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(6, index)
def testPedigreeNoFamSexOrParents(self): DataParser.has_fid = False DataParser.has_sex = False DataParser.has_parents = False with open(self.ped_filename, "w") as f: f.write("""1 0.1 A A G T A A G G C T G T T T 2 0.4 A C G T G G C G T T G G C T 3 1.0 A A G G A G C C C C G T C T 4 0.5 A A G G A G C G C T G G T T 5 0.9 A C G G A A C G C C G G T T 6 1.0 A A G T A A G G C C G G T T 7 0.1 A A G T A A G G C T G G T T 8 0.4 A C G T G G C G T T G G C T 9 1.0 A A G G A G C C C C G T C T 10 0.5 A A G G A G C G C T G G T T 11 0.9 A C G G A A C G C C G G T T 12 1.0 A A G T A A G G C C G G T T""") pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedWithMissingMxSnpComplete(self): pc = PhenoCovar() DataParser.snp_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.miss_tped_filename, split=True) genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] hetero_freq_tped = [ 0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727 ] self.assertEqual(7, ped_parser.locus_count) index = 0 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) index += 1 self.assertEqual(7, index) index = 0 missing = 0 valid = 0 for snp in ped_parser: for y in pc: (pheno, covars, nonmissing) = y.get_variables(snp.missing_genotypes) try: genodata = snp.get_genotype_data(nonmissing) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data)) self.assertAlmostEqual(hetero_freq_tped[index], genodata.hetero_freq, places=4) valid += 1 except TooMuchMissing as e: missing += 1 except InvalidFrequency as e: pass index += 1 self.assertEqual(1, missing) self.assertEqual(6, valid) self.assertEqual(7, index)
def testPedNegativePositions(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testCmdLineNoExternalVars(self): cmds = "--file %s --sex" % (self.ped_filename.split(".")[0]) app = mvtest.MVTestApplication() ped_parser, pc, args = app.LoadCmdLine(cmds.split(" ")) mapdata = libgwas.get_lines(self.map_filename, split=True) c = pc.covariate_data y = pc.phenotype_data self.assertEqual(["SEX"], pc.covariate_labels) self.assertEqual(["Pheno-1"], pc.phenotype_names) for i in range(0, len(y[0])): self.assertAlmostEqual(self.dummy_pheno[i], y[0][i]) self.assertAlmostEqual(1, c[0][i])
def testPedNegativePositions(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.misssnp_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.misssnp_tped_filename, split=True) index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testWithFewIndividuals(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.miniped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) genotypes = [[0, 1, 0], [1, 1, 0], [0, 2, 1], [0, 1, 1], [1, 0, 1], [1, 0, 1], [0, 1, 1]] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedComplete(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() self.assertEqual(12, ped_parser.ind_count) mapdata = libgwas.get_lines(self.tped_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedNegativePosLocalChromMissSNP(self): BoundaryCheck.chrom = 1 DataParser.boundary.LoadExclusions(snps=["rs0004"]) pc = PhenoCovar() ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(3, index)
def testPedigreeIndExclusionsComplete(self): DataParser.ind_exclusions = ["11:11", "12:12"] pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index][0:10], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedBoundary(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) DataParser.boundary = BoundaryCheck() BoundaryCheck.chrom = 2 ped_parser.load_mapfile() ped_parser.load_genotypes(pc) pedigree = get_lines(self.map_filename, split=True) index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testCmdLinePhenoWithNames(self): cmds = "--file %s --pheno %s --pheno-names=BMI --covar %s --covar-names SEX,AGE" % ( self.ped_filename.split(".")[0], self.pheno_covar, self.pheno_covar) app = mvtest.MVTestApplication() ped_parser, pc, args = app.LoadCmdLine(cmds.split(" ")) mapdata = libgwas.get_lines(self.map_filename, split=True) c = pc.covariate_data y = pc.phenotype_data self.assertEqual(["SEX", "AGE"], pc.covariate_labels) self.assertEqual(["BMI"], pc.phenotype_names) for i in range(0, len(y[0])): self.assertAlmostEqual(self.phenotype_data[i], y[0][i]) self.assertAlmostEqual(self.covariate_data[0][i], c[0][i]) self.assertAlmostEqual(self.covariate_data[1][i], c[1][i])
def testPedMultiPheno(self): PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) with open(self.pheno_file) as f: pc.load_phenofile(f, indices=[2, 3]) mapdata = get_lines(self.map_filename, split=True) sex = [1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1] pheno_data = [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], [1.0, 0.5, 0.6, 0.5, 1.0, 0.1], [0.5, 1.0, 0.1, 0.5, 1.0, 0.2]] dual_pheno = [[1.0, 0.5, 0.6, 0.5, 1.0, 0.1, 0.5, 0.6, 0.5, 1.0, 0.1], [0.5, 1.0, 0.1, 0.5, 1.0, 0.2, 1.0, 0.1, 0.5, 1.0, 0.2]] self.assertEqual(2, len(pc.phenotype_data)) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) idx = 0 for y in pc: non_missing = numpy.ones(len(snp.genotype_data), dtype=bool) non_missing[6] = False (pheno, covariates, nm_indata) = y.get_variables(numpy.invert(non_missing)) for i in range(0, 11): self.assertEqual(sex[i], covariates[0][i]) self.assertAlmostEqual(dual_pheno[idx][i], pheno[i]) idx += 1 self.assertEqual(2, idx) index += 1 self.assertEqual(7, index)
def testTPedPhenoComplete(self): PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() self.assertEqual(12, len(pc.covariate_data[0])) self.assertEqual(12, len(pc.phenotype_data[0])) self.assertEqual(1, len(pc.phenotype_names)) mapdata = libgwas.get_lines(self.tped_filename, split=True) index = 0 self.assertEqual(7, ped_parser.locus_count) for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedCompleteAlternateIteration(self): """Useful if you need to iterate over these in a more controlled manner""" pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 0 snp = next(ped_parser.__iter__()) try: while True: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 next(snp) except StopIteration: pass self.assertEqual(7, index)