def testPedSnpBoundary2TPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0006"]) BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = [x.split() for x in open(self.tped_filename).readlines()] index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertAlmostEqual(self.hetero_freq_tped[index], snp.hetero_freq, places=4) index += 1 self.assertEqual(2, ped_parser.locus_count) index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(6, index)
def testPedRegionBoundaryWithExclusionsTPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"]) DataParser.boundary.LoadExclusions(snps=["rs0007"]) BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = [x.split() for x in open(self.tped_filename).readlines()] index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) index += 1 index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(6, index)
def testPedSnpBoundary2TPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0006"]) BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = [x.split() for x in open(self.tped_filename).readlines()] index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertAlmostEqual(self.hetero_freq_tped[index], snp.hetero_freq, places=4) index += 1 self.assertEqual(2, ped_parser.locus_count) index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(6, index)
def testPedRegionBoundaryWithExclusionsTPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"]) DataParser.boundary.LoadExclusions(snps=["rs0007"]) BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = [x.split() for x in open(self.tped_filename).readlines()] index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) index += 1 index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(6, index)
def testPedWithMissingMxIndComplete(self): pc = PhenoCovar() DataParser.ind_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [x.strip().split() for x in open(self.miss_tped_filename).readlines()] genotypes_w_missing = [ [0, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1], [1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0] ] self.assertEqual(7, ped_parser.locus_count) index = 0 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) index += 1 index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedSnpBoundaryTPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0001-rs0003"]) BoundaryCheck.chrom = 1 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = libgwas.get_lines(self.tped_filename, split=True) index = 0 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertAlmostEqual(self.hetero_freq_tped[index], snp.hetero_freq, places=4) index += 1 self.assertEqual(3, ped_parser.locus_count) index = 0 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(3, index)
def testPedRegionBoundaryWithExclusionsTPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"]) DataParser.boundary.LoadExclusions(snps=["rs0007"]) BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = get_lines(self.tped_filename, split=True) index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) index += 1 index = 4 for snp in ped_parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(6, index)
def testPedWithMissingMxSnpComplete(self): pc = PhenoCovar() DataParser.snp_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.miss_tped_filename, split=True) genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] hetero_freq_tped = [ 0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727 ] self.assertEqual(7, ped_parser.locus_count) index = 0 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) index += 1 self.assertEqual(7, index) index = 0 missing = 0 valid = 0 for snp in ped_parser: for y in pc: (pheno, covars, nonmissing) = y.get_variables(snp.missing_genotypes) try: genodata = snp.get_genotype_data(nonmissing) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data)) self.assertAlmostEqual(hetero_freq_tped[index], genodata.hetero_freq, places=4) valid += 1 except TooMuchMissing as e: missing += 1 except InvalidFrequency as e: pass index += 1 self.assertEqual(1, missing) self.assertEqual(6, valid) self.assertEqual(7, index)
def testAllelesInLoci(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() index = 0 for snp in ped_parser.get_loci(): self.assertEqual(self.tped1_alleles[index][1], snp.minor_allele) self.assertEqual(self.tped1_alleles[index][0], snp.major_allele) index += 1 self.assertEqual(7, index)
def testPedWithMissingMxSnpComplete(self): pc = PhenoCovar() DataParser.snp_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = get_lines(self.miss_tped_filename, split=True) genotypes_w_missing = [[0, 0], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] hetero_freq_tped = [ 0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727 ] self.assertEqual(7, ped_parser.locus_count) index = 0 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) index += 1 index = 0 for snp in ped_parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertAlmostEqual(hetero_freq_tped[index], genodata.hetero_freq, places=4) self.assertEqual(genotypes_w_missing[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(7, index)
def testPedWithMissingMxSnpComplete(self): pc = PhenoCovar() DataParser.snp_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [ x.strip().split() for x in open(self.miss_tped_filename).readlines() ] genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] hetero_freq_tped = [ 0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727 ] self.assertEqual(6, ped_parser.locus_count) index = 1 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertAlmostEqual(hetero_freq_tped[index], snp.hetero_freq, places=4) index += 1 index = 1 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedWithMissingMxSnpComplete(self): pc = PhenoCovar() DataParser.snp_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [x.strip().split() for x in open(self.miss_tped_filename).readlines()] genotypes_w_missing = [ [0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0] ] hetero_freq_tped = [0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727] self.assertEqual(6, ped_parser.locus_count) index = 1 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertAlmostEqual(hetero_freq_tped[index], snp.hetero_freq, places=4) index += 1 index = 1 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedBoundaryTPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = BoundaryCheck() BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = get_lines(self.tped_filename, split=True) index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) index += 1 self.assertEqual(3, ped_parser.locus_count) index = 4 for snp in ped_parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertAlmostEqual(self.hetero_freq_tped[index], genodata.hetero_freq, places=4) self.assertEqual(self.genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(7, index)
def testPedWithMissingMxIndComplete(self): pc = PhenoCovar() DataParser.ind_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [ x.strip().split() for x in open(self.miss_tped_filename).readlines() ] genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1], [1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] self.assertEqual(7, ped_parser.locus_count) index = 0 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) index += 1 index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)