def testPedSnpBoundary2TPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0006"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = [x.split() for x in open(self.tped_filename).readlines()]
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertAlmostEqual(self.hetero_freq_tped[index],
                                   snp.hetero_freq,
                                   places=4)
            index += 1
        self.assertEqual(2, ped_parser.locus_count)
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)
    def testPedRegionBoundaryWithExclusionsTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"])
        DataParser.boundary.LoadExclusions(snps=["rs0007"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = [x.split() for x in open(self.tped_filename).readlines()]
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            index += 1
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)
    def testPedSnpBoundary2TPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0006"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = [x.split() for x in open(self.tped_filename).readlines()]
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertAlmostEqual(self.hetero_freq_tped[index], snp.hetero_freq, places=4)
            index += 1
        self.assertEqual(2, ped_parser.locus_count)
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)
    def testPedRegionBoundaryWithExclusionsTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"])
        DataParser.boundary.LoadExclusions(snps=["rs0007"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = [x.split() for x in open(self.tped_filename).readlines()]
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            index += 1
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)
    def testPedWithMissingMxIndComplete(self):
        pc = PhenoCovar()
        DataParser.ind_miss_tol = 0.5       # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [x.strip().split() for x in open(self.miss_tped_filename).readlines()]
        genotypes_w_missing = [
            [0, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1],
            [1,  0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
            [0,  1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
            [0,  2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
            [1,  0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
            [1,  1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [0,  1, 0, 0, 0, 0, 1, 1, 0, 0, 0]

        ]
        self.assertEqual(7, ped_parser.locus_count)

        index = 0
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            index += 1

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
    def testPedSnpBoundaryTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        DataParser.boundary = SnpBoundaryCheck(snps=["rs0001-rs0003"])
        BoundaryCheck.chrom = 1
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = libgwas.get_lines(self.tped_filename, split=True)
        index = 0
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertAlmostEqual(self.hetero_freq_tped[index],
                                   snp.hetero_freq,
                                   places=4)
            index += 1
        self.assertEqual(3, ped_parser.locus_count)
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(3, index)
    def testPedRegionBoundaryWithExclusionsTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"])
        DataParser.boundary.LoadExclusions(snps=["rs0007"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = get_lines(self.tped_filename, split=True)

        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            index += 1
        index = 4
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(pedigree[index][0]), snp.chr)
                self.assertEqual(int(pedigree[index][3]), snp.pos)
                self.assertEqual(pedigree[index][1], snp.rsid)
                self.assertEqual(self.genotypes[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            index += 1
        self.assertEqual(6, index)
    def testPedWithMissingMxSnpComplete(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5  # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.miss_tped_filename, split=True)

        genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0],
                               [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]

        hetero_freq_tped = [
            0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727
        ]

        self.assertEqual(7, ped_parser.locus_count)
        index = 0
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            index += 1
        self.assertEqual(7, index)
        index = 0
        missing = 0
        valid = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)
                try:
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    self.assertEqual(genotypes_w_missing[index],
                                     list(snp.genotype_data))
                    self.assertAlmostEqual(hetero_freq_tped[index],
                                           genodata.hetero_freq,
                                           places=4)

                    valid += 1
                except TooMuchMissing as e:
                    missing += 1
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(1, missing)
        self.assertEqual(6, valid)
        self.assertEqual(7, index)
Beispiel #9
0
    def testAllelesInLoci(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        index = 0
        for snp in ped_parser.get_loci():
            self.assertEqual(self.tped1_alleles[index][1], snp.minor_allele)
            self.assertEqual(self.tped1_alleles[index][0], snp.major_allele)

            index += 1
        self.assertEqual(7, index)
    def testPedWithMissingMxSnpComplete(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5  # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = get_lines(self.miss_tped_filename, split=True)

        genotypes_w_missing = [[0, 0], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]

        hetero_freq_tped = [
            0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727
        ]

        self.assertEqual(7, ped_parser.locus_count)
        index = 0
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            index += 1

        index = 0
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(mapdata[index][0]), snp.chr)
                self.assertEqual(int(mapdata[index][3]), snp.pos)
                self.assertEqual(mapdata[index][1], snp.rsid)
                self.assertAlmostEqual(hetero_freq_tped[index],
                                       genodata.hetero_freq,
                                       places=4)
                self.assertEqual(genotypes_w_missing[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            index += 1
        self.assertEqual(7, index)
    def testPedWithMissingMxSnpComplete(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5  # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [
            x.strip().split()
            for x in open(self.miss_tped_filename).readlines()
        ]

        genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0],
                               [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]

        hetero_freq_tped = [
            0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727
        ]

        self.assertEqual(6, ped_parser.locus_count)
        index = 1
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertAlmostEqual(hetero_freq_tped[index],
                                   snp.hetero_freq,
                                   places=4)
            index += 1

        index = 1
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes_w_missing[index],
                             list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
    def testPedWithMissingMxSnpComplete(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5       # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [x.strip().split() for x in open(self.miss_tped_filename).readlines()]

        genotypes_w_missing = [
            [0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0],
            [1,  1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
            [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
            [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
            [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
            [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]

        ]

        hetero_freq_tped = [0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727]

        self.assertEqual(6, ped_parser.locus_count)
        index = 1
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertAlmostEqual(hetero_freq_tped[index], snp.hetero_freq, places=4)
            index += 1

        index = 1
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
    def testPedBoundaryTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        DataParser.boundary = BoundaryCheck()
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = get_lines(self.tped_filename, split=True)

        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)

            index += 1
        self.assertEqual(3, ped_parser.locus_count)
        index = 4
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(pedigree[index][0]), snp.chr)
                self.assertEqual(int(pedigree[index][3]), snp.pos)
                self.assertEqual(pedigree[index][1], snp.rsid)
                self.assertAlmostEqual(self.hetero_freq_tped[index],
                                       genodata.hetero_freq,
                                       places=4)
                self.assertEqual(self.genotypes[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            index += 1
        self.assertEqual(7, index)
    def testPedWithMissingMxIndComplete(self):
        pc = PhenoCovar()
        DataParser.ind_miss_tol = 0.5  # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [
            x.strip().split()
            for x in open(self.miss_tped_filename).readlines()
        ]
        genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1],
                               [1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
        self.assertEqual(7, ped_parser.locus_count)

        index = 0
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            index += 1

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes_w_missing[index],
                             list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)