def testPedigreeNoFamId(self): DataParser.has_fid = False f = open(self.ped_filename, "w") f.write( """1 0 0 1 0.1 A A G T A A G G C T G T T T 2 0 0 1 0.4 A C G T G G C G T T G G C T 3 0 0 2 1.0 A A G G A G C C C C G T C T 4 0 0 2 0.5 A A G G A G C G C T G G T T 5 0 0 1 0.9 A C G G A A C G C C G G T T 6 0 0 1 1.0 A A G T A A G G C C G G T T 7 0 0 1 0.1 A A G T A A G G C T G G T T 8 0 0 1 0.4 A C G T G G C G T T G G C T 9 0 0 2 1.0 A A G G A G C C C C G T C T 10 0 0 2 0.5 A A G G A G C G C T G G T T 11 0 0 1 0.9 A C G G A A C G C C G G T T 12 0 0 1 1.0 A A G T A A G G C C G G T T""" ) f.close() pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedigreeIndExclusionsMissingIndThresh(self): pc = PhenoCovar() DataParser.ind_exclusions = ["11:11", "12:12"] DataParser.ind_miss_tol = 0.5 # We should only lose 1 ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) genotypes = [[1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0], [-1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1], [2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2], [2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] index = 0 map_idx = 0 for snp in ped_parser: self.assertEqual(int(mapdata[map_idx][0]), snp.chr) self.assertEqual(int(mapdata[map_idx][3]), snp.pos) self.assertEqual(mapdata[map_idx][1], snp.rsid) self.assertEqual(genotypes[index][0:9], list(snp.genotype_data)) index += 1 map_idx += 1 self.assertEqual(7, index)
def testPedigreeIndExclusionsMissingComplete(self): DataParser.ind_exclusions = ["11:11", "12:12"] pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] genotypes = [ [0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0], [-1, -1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1], [-1, 2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [-1, 1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2], [-1, 2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [-1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0] ] index = 0 for snp in ped_parser: self.assertEqual(genotypes[index][0:10], list(snp.genotype_data)) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) index += 1 self.assertEqual(7, index)
def testPedigreeNoPheno(self): DataParser.has_pheno = False with open(self.ped_filename, "w") as f: f.write("""1 1 0 0 1 A A G T A A G G C T G T T T 2 2 0 0 1 A C G T G G C G T T G G C T 3 3 0 0 2 A A G G A G C C C C G T C T 4 4 0 0 2 A A G G A G C G C T G G T T 5 5 0 0 1 A C G G A A C G C C G G T T 6 6 0 0 1 A A G T A A G G C C G G T T 7 7 0 0 1 A A G T A A G G C T G G T T 8 8 0 0 1 A C G T G G C G T T G G C T 9 9 0 0 2 A A G G A G C C C C G T C T 10 10 0 0 2 A A G G A G C G C T G G T T 11 11 0 0 1 A C G G A A C G C C G G T T 12 12 0 0 1 A A G T A A G G C C G G T T""") PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedigreeWithLiability(self): DataParser.has_liability = True f = open(self.ped_filename, "w") f.write("""1 1 0 0 1 0.1 1 A A G T A A G G C T G T T T 2 2 0 0 1 0.4 1 A C G T G G C G T T G G C T 3 3 0 0 2 1.0 1 A A G G A G C C C C G T C T 4 4 0 0 2 0.5 1 A A G G A G C G C T G G T T 5 5 0 0 1 0.9 1 A C G G A A C G C C G G T T 6 6 0 0 1 1.0 1 A A G T A A G G C C G G T T 7 7 0 0 1 0.1 1 A A G T A A G G C T G G T T 8 8 0 0 1 0.4 1 A C G T G G C G T T G G C T 9 9 0 0 2 1.0 1 A A G G A G C C C C G T C T 10 10 0 0 2 0.5 1 A A G G A G C G C T G G T T 11 11 0 0 1 0.9 1 A C G G A A C G C C G G T T 12 12 0 0 1 1.0 1 A A G T A A G G C C G G T T""") f.close() pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedigreeNoFamId(self): DataParser.has_fid = False f = open(self.ped_filename, "w") f.write("""1 0 0 1 0.1 A A G T A A G G C T G T T T 2 0 0 1 0.4 A C G T G G C G T T G G C T 3 0 0 2 1.0 A A G G A G C C C C G T C T 4 0 0 2 0.5 A A G G A G C G C T G G T T 5 0 0 1 0.9 A C G G A A C G C C G G T T 6 0 0 1 1.0 A A G T A A G G C C G G T T 7 0 0 1 0.1 A A G T A A G G C T G G T T 8 0 0 1 0.4 A C G T G G C G T T G G C T 9 0 0 2 1.0 A A G G A G C C C C G T C T 10 0 0 2 0.5 A A G G A G C G C T G G T T 11 0 0 1 0.9 A C G G A A C G C C G G T T 12 0 0 1 1.0 A A G T A A G G C C G G T T""") f.close() pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testForInvariant(self): prefix = "__test_pedigree" self.pheno_file = "%s_mch.txt" % (prefix) with open(self.pheno_file, "w") as f: f.write("""FID\tIID\tBMI\tIBM\tMSA 1\t1\t0.1\t1.0\t1.0 2\t2\t0.2\t0.5\t1.0 3\t3\t0.3\t0.6\t1.0 4\t4\t0.4\t0.5\t1.0 5\t5\t0.5\t1.0\t1.0 6\t6\t0.6\t0.1\t1.0 17\t7\t0.1\t1.0\t1.0 8\t8\t0.2\t0.5\t1.0 9\t9\t0.3\t0.6\t1.0 10\t10\t0.4\t0.5\t1.0 11\t11\t0.5\t1.0\t1.0 12\t12\t0.6\t0.1\t1.0""") PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) with open(self.pheno_file) as f2: pc.load_phenofile(f2, indices=[3]) index = 0 mapdata = get_lines(self.map_filename, split=True) with self.assertRaises(InvariantVar): for snp in ped_parser: for y in pc: non_missing = numpy.ones(len(snp.genotype_data), dtype=bool) (pheno, covariates, nonmissing) = y.get_variables( numpy.invert(non_missing))
def testPedigreeNoFamSexOrParents(self): DataParser.has_fid = False DataParser.has_sex = False DataParser.has_parents = False with open(self.ped_filename, "w") as f: f.write("""1 0.1 A A G T A A G G C T G T T T 2 0.4 A C G T G G C G T T G G C T 3 1.0 A A G G A G C C C C G T C T 4 0.5 A A G G A G C G C T G G T T 5 0.9 A C G G A A C G C C G G T T 6 1.0 A A G T A A G G C C G G T T 7 0.1 A A G T A A G G C T G G T T 8 0.4 A C G T G G C G T T G G C T 9 1.0 A A G G A G C C C C G T C T 10 0.5 A A G G A G C G C T G G T T 11 0.9 A C G G A A C G C C G G T T 12 1.0 A A G T A A G G C C G G T T""") pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedigreeIndExclusionsMissingIndThresh(self): pc = PhenoCovar() DataParser.ind_exclusions = ["11:11", "12:12"] DataParser.ind_miss_tol = 0.5 # We should only lose 1 ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] genotypes = [ [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0], [-1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1], [2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2], [2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0], ] index = 0 map_idx = 0 for snp in ped_parser: self.assertEqual(int(mapdata[map_idx][0]), snp.chr) self.assertEqual(int(mapdata[map_idx][3]), snp.pos) self.assertEqual(mapdata[map_idx][1], snp.rsid) self.assertEqual(genotypes[index][0:9], list(snp.genotype_data)) index += 1 map_idx += 1 self.assertEqual(7, index)
def testPedigreeNoPheno(self): DataParser.has_pheno = False f = open(self.ped_filename, "w") f.write( """1 1 0 0 1 A A G T A A G G C T G T T T 2 2 0 0 1 A C G T G G C G T T G G C T 3 3 0 0 2 A A G G A G C C C C G T C T 4 4 0 0 2 A A G G A G C G C T G G T T 5 5 0 0 1 A C G G A A C G C C G G T T 6 6 0 0 1 A A G T A A G G C C G G T T 7 7 0 0 1 A A G T A A G G C T G G T T 8 8 0 0 1 A C G T G G C G T T G G C T 9 9 0 0 2 A A G G A G C C C C G T C T 10 10 0 0 2 A A G G A G C G C T G G T T 11 11 0 0 1 A C G G A A C G C C G G T T 12 12 0 0 1 A A G T A A G G C C G G T T""" ) f.close() PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedigreeIndExclusionsMissingComplete(self): DataParser.ind_exclusions = ["11:11", "12:12"] pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] genotypes = [ [0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0], [-1, -1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1], [-1, 2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [-1, 1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2], [-1, 2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [-1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0], ] index = 0 for snp in ped_parser: self.assertEqual(genotypes[index][0:10], list(snp.genotype_data)) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) index += 1 self.assertEqual(7, index)
def testMissingComplete(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 0 missing = 0 valid = 0 for snp in ped_parser: for y in pc: (pheno, covars, nonmissing) = y.get_variables(snp.missing_genotypes) try: genodata = snp.get_genotype_data(nonmissing) self.assertEqual(self.missing_genotypes[index], list(genodata.genotypes)) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) valid += 1 except TooMuchMissing as e: missing += 1 except InvalidFrequency as e: pass index += 1 self.assertEqual(0, missing) self.assertEqual(7, valid)
def testMissingSnpThresh(self): pc = PhenoCovar() DataParser.snp_miss_tol = 0.5 # We should only lose 1 ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) #self.assertEqual([0,1,0,0,0,0,0,0,0,0,0,0], list(ped_parser.individual_mask)) mapdata = get_lines(self.map_filename, split=True) index = 0 missing = 0 for snp in ped_parser: for y in pc: (pheno, covars, nonmissing) = y.get_variables(snp.missing_genotypes) try: genodata = snp.get_genotype_data(nonmissing) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.missing_genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: missing += 1 except InvalidFrequency as e: pass index += 1 self.assertEqual(1, missing) self.assertEqual(7, index)
def testMapFileWithRegionAndSnpExclusion(self): BoundaryCheck.chrom = 2 DataParser.boundary = BoundaryCheck(bp=[0, 10000]) ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() self.assertEqual(2, len(ped_parser.markers)) self.assertEqual(7, len(ped_parser.snp_mask[:, 0])) self.assertEqual(2, ped_parser.locus_count) # Masks are filters, so we should have 7 entries, but 4 will be 1 self.assertEqual(5, numpy.sum(ped_parser.snp_mask[:, 0])) self.assertEqual(0, ped_parser.snp_mask[4, 1]) self.assertEqual(0, ped_parser.snp_mask[5, 0])
def testMapFileWithSnpBoundary(self): BoundaryCheck.chrom = 1 DataParser.boundary = SnpBoundaryCheck(snps=["rs0001-rs0003"]) ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() self.assertEqual(3, len(ped_parser.markers)) self.assertEqual(7, len(ped_parser.snp_mask)) self.assertEqual(3, ped_parser.locus_count) # Masks are filters, so we should have 7 entries, but 4 will be 1 self.assertEqual(4, numpy.sum(ped_parser.snp_mask[:, 0])) self.assertEqual(0, ped_parser.snp_mask[0, 0]) self.assertEqual(0, ped_parser.snp_mask[1, 1]) self.assertEqual(0, ped_parser.snp_mask[2, 1])
def testMap3File(self): ped_parser = PedigreeParser(self.map3_filename, self.ped_filename) ped_parser.load_mapfile(map3=True) self.assertEqual(7, len(ped_parser.markers)) self.assertEqual(7, len(ped_parser.snp_mask)) self.assertEqual(7, ped_parser.locus_count) chrom = [int(a) for a in "1,1,1,1,2,2,2".split(",")] self.assertEqual(chrom, list(ped_parser.markers[:, 0])) self.assertEqual("rs0001", ped_parser.rsids[0]) self.assertEqual("rs0005", ped_parser.rsids[4]) self.assertEqual("rs0007", ped_parser.rsids[6]) self.assertEqual([500, 10000, 25000, 45000, 750, 10000, 25000], list(ped_parser.markers[:, 1])) # Masks are filters, so we should have 7 entries, but none will be 1 self.assertEqual(0, numpy.sum(ped_parser.snp_mask))
def testPedNegativePositions(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testWithFewIndividuals(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.miniped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] genotypes = [[0, 1, 0], [1, 1, 0], [0, 2, 1], [0, 1, 1], [1, 0, 1], [1, 0, 1], [0, 1, 1]] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedNegativePositions(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testMissingComplete(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 0 for snp in ped_parser: self.assertEqual(self.missing_genotypes[index], list(snp.genotype_data)) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) index += 1 self.assertEqual(7, index)
def testPedNegativePositionsLocalChrom(self): BoundaryCheck.chrom = 1 pc = PhenoCovar() ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(4, index)
def testWithFewIndividuals(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.miniped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) genotypes = [[0, 1, 0], [1, 1, 0], [0, 2, 1], [0, 1, 1], [1, 0, 1], [1, 0, 1], [0, 1, 1]] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedigreeIndExclusionsComplete(self): DataParser.ind_exclusions = ["11:11", "12:12"] pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index][0:10], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedBoundary(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) DataParser.boundary = BoundaryCheck() BoundaryCheck.chrom = 2 ped_parser.load_mapfile() ped_parser.load_genotypes(pc) pedigree = get_lines(self.map_filename, split=True) index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedNegativePosLocalChromMissSNP(self): BoundaryCheck.chrom = 1 DataParser.boundary.LoadExclusions(snps=["rs0004"]) pc = PhenoCovar() ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(3, index)
def testPedNegativePosLocalChromMissSNP(self): BoundaryCheck.chrom = 1 DataParser.boundary.LoadExclusions(snps=["rs0004"]) pc = PhenoCovar() ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(3, index)
def testPedBoundary(self): pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) DataParser.boundary = BoundaryCheck() BoundaryCheck.chrom = 2 ped_parser.load_mapfile() ped_parser.load_genotypes(pc) pedigree = [x.split() for x in open(self.map_filename).readlines()] index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testMissingIndThresh(self): pc = PhenoCovar() DataParser.ind_miss_tol = 0.5 # We should only lose 1 ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 0 map_idx = 0 for snp in ped_parser: self.assertEqual(int(mapdata[map_idx][0]), snp.chr) self.assertEqual(int(mapdata[map_idx][3]), snp.pos) self.assertEqual(mapdata[map_idx][1], snp.rsid) self.assertEqual(self.missing_genotypes[index][1:], list(snp.genotype_data)) index += 1 map_idx += 1 self.assertEqual(7, index)
def testMissingSnpThresh(self): pc = PhenoCovar() DataParser.snp_miss_tol = 0.5 # We should only lose 1 ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) # self.assertEqual([0,1,0,0,0,0,0,0,0,0,0,0], list(ped_parser.individual_mask)) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 0 for snp in ped_parser: if index == 1: # This gets dropped due to missingness index += 1 self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.missing_genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedMultiPheno(self): PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) with open(self.pheno_file) as f: pc.load_phenofile(f, indices=[2, 3]) mapdata = get_lines(self.map_filename, split=True) sex = [1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1] pheno_data = [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], [1.0, 0.5, 0.6, 0.5, 1.0, 0.1], [0.5, 1.0, 0.1, 0.5, 1.0, 0.2]] dual_pheno = [[1.0, 0.5, 0.6, 0.5, 1.0, 0.1, 0.5, 0.6, 0.5, 1.0, 0.1], [0.5, 1.0, 0.1, 0.5, 1.0, 0.2, 1.0, 0.1, 0.5, 1.0, 0.2]] self.assertEqual(2, len(pc.phenotype_data)) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) idx = 0 for y in pc: non_missing = numpy.ones(len(snp.genotype_data), dtype=bool) non_missing[6] = False (pheno, covariates, nm_indata) = y.get_variables(numpy.invert(non_missing)) for i in range(0, 11): self.assertEqual(sex[i], covariates[0][i]) self.assertAlmostEqual(dual_pheno[idx][i], pheno[i]) idx += 1 self.assertEqual(2, idx) index += 1 self.assertEqual(7, index)
def testMissingSnpThresh(self): pc = PhenoCovar() DataParser.snp_miss_tol = 0.5 # We should only lose 1 ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) #self.assertEqual([0,1,0,0,0,0,0,0,0,0,0,0], list(ped_parser.individual_mask)) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 0 for snp in ped_parser: if index == 1: # This gets dropped due to missingness index += 1 self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.missing_genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedMultiPheno(self): PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) pc.load_phenofile(open(self.pheno_file), indices=[2, 3]) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] sex = [1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1] pheno_data = [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], [1.0, 0.5, 0.6, 0.5, 1.0, 0.1], [0.5, 1.0, 0.1, 0.5, 1.0, 0.2]] dual_pheno = [ [1.0, 0.5, 0.6, 0.5, 1.0, 0.1, 0.5, 0.6, 0.5, 1.0, 0.1], [0.5, 1.0, 0.1, 0.5, 1.0, 0.2, 1.0, 0.1, 0.5, 1.0, 0.2], ] self.assertEqual(2, len(pc.phenotype_data)) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) idx = 0 for y in pc: non_missing = numpy.ones(len(snp.genotype_data), dtype=bool) non_missing[6] = False (pheno, covariates, nm_indata) = y.get_variables(numpy.invert(non_missing)) for i in range(0, 11): self.assertEqual(sex[i], covariates[0][i]) self.assertAlmostEqual(dual_pheno[idx][i], pheno[i]) idx += 1 self.assertEqual(2, idx) index += 1 self.assertEqual(7, index)
def testPedCompleteAlternateIteration(self): """Useful if you need to iterate over these in a more controlled manner""" pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] index = 0 snp = ped_parser.__iter__().next() try: while True: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 snp.next() except StopIteration: pass self.assertEqual(7, index)
def testPedCompleteAlternateIteration(self): """Useful if you need to iterate over these in a more controlled manner""" pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) mapdata = get_lines(self.map_filename, split=True) index = 0 snp = next(ped_parser.__iter__()) try: while True: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 next(snp) except StopIteration: pass self.assertEqual(7, index)
def testForInvariant(self): prefix = "__test_pedigree" self.pheno_file = "%s_mch.txt" % (prefix) f = open(self.pheno_file, "w") f.write( """FID\tIID\tBMI\tIBM\tMSA 1\t1\t0.1\t1.0\t1.0 2\t2\t0.2\t0.5\t1.0 3\t3\t0.3\t0.6\t1.0 4\t4\t0.4\t0.5\t1.0 5\t5\t0.5\t1.0\t1.0 6\t6\t0.6\t0.1\t1.0 17\t7\t0.1\t1.0\t1.0 8\t8\t0.2\t0.5\t1.0 9\t9\t0.3\t0.6\t1.0 10\t10\t0.4\t0.5\t1.0 11\t11\t0.5\t1.0\t1.0 12\t12\t0.6\t0.1\t1.0""" ) f.close() PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = PedigreeParser(self.map_filename, self.ped_filename) ped_parser.load_mapfile() ped_parser.load_genotypes(pc) pc.load_phenofile(open(self.pheno_file), indices=[3]) index = 0 mapdata = [x.strip().split() for x in open(self.map_filename).readlines()] with self.assertRaises(InvariantVar): for snp in ped_parser: for y in pc: non_missing = numpy.ones(len(snp.genotype_data), dtype=bool) (pheno, covariates, nonmissing) = y.get_variables(numpy.invert(non_missing))