Esempio n. 1
0
    def testEmptyPhenoCovar(self):
        pc = PhenoCovar()

        pc = PhenoCovar()
        parser = Parser(self.missing, data_field='GT')
        parser.init_subjects(pc)
        parser.load_genotypes()

        index = 0
        missing_count = 0
        self.assertEqual(7, parser.locus_count)

        for snp in parser:
            for y in pc:
                try:
                    (pheno, covars,
                     non_missing) = y.get_variables(snp.missing_genotypes)
                    genodata = snp.get_genotype_data(non_missing)
                    self.assertEqual(int(self.nonmissing_mapdata[index][0]),
                                     snp.chr)
                    self.assertEqual(int(self.nonmissing_mapdata[index][1]),
                                     snp.pos)
                    self.assertEqual(self.nonmissing_mapdata[index][2],
                                     snp.rsid)
                    self.assertEqual(self.genotypes[index],
                                     list(genodata.genotypes))
                except TooMuchMissingpPhenoCovar as e:
                    missing_count += 1
                    self.assertAlmostEqual(1.0, e.pct, places=4)
                except InvalidFrequency as e:
                    pass
                index += 1
        self.assertEqual(7, index)
        self.assertEqual(7, missing_count)
Esempio n. 2
0
    def testPedWithMissingMxIndExclusionsToo(self):
        pc = PhenoCovar()
        DataParser.ind_exclusions = ["3", "4"]
        DataParser.ind_miss_tol = 0.5  # We should lose "2"

        pc = PhenoCovar()
        parser = Parser(self.missing, data_field='GT')
        parser.init_subjects(pc)
        parser.load_genotypes()

        genotypes_w_missing = [[0, 1], [1, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, 0, 0, 0, 1, 1, 0, 0, 0]]
        index = 0
        for snp in parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(self.nonmissing_mapdata[index][0]),
                                 snp.chr)
                self.assertEqual(int(self.nonmissing_mapdata[index][1]),
                                 snp.pos)
                self.assertEqual(self.nonmissing_mapdata[index][2], snp.rsid)
                self.assertEqual(genotypes_w_missing[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass

            index += 1
        self.assertEqual(7, index)
Esempio n. 3
0
    def test_covar_header(self):
        # Indicate that we want to use sex as a covariate
        PhenoCovar.sex_as_covariate = False
        pc = PhenoCovar()
        load_pedigree(pc, self.ped)

        pc.load_covarfile(self.header)

        self.assertEqual(6, len(pc.covariate_data[0]))
        self.assertEqual(1, len(pc.covariate_labels))
        self.assertEqual("BMI", pc.covariate_labels[0])

        covariate_values = [0.9, 1.0, 0.4, 0.8, 1, 0.1]
        for idx in range(0, len(covariate_values)):
            self.assertAlmostEqual(covariate_values[idx],
                                   pc.covariate_data[0][idx])

        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        load_pedigree(pc, self.ped)

        pc.load_covarfile(self.header)

        self.assertEqual(6, len(pc.covariate_data[0]))
        self.assertEqual(2, len(pc.covariate_labels))
        self.assertEqual("SEX", pc.covariate_labels[0])
        self.assertEqual("BMI", pc.covariate_labels[1])

        sex = [1, 1, 2, 2, 1, 1]
        covariate_values = [0.9, 1.0, 0.4, 0.8, 1, 0.1]

        for idx in range(0, len(covariate_values)):
            self.assertEqual(sex[idx], pc.covariate_data[0][idx])
            self.assertAlmostEqual(covariate_values[idx],
                                   pc.covariate_data[1][idx])
Esempio n. 4
0
    def testCovarNoSex(self):
        self.ped = [l.strip() for l in open(self.filenames[2]).readlines()]

        # Indicate that we want to use sex as a covariate
        PhenoCovar.sex_as_covariate = False

        pc = PhenoCovar()
        load_pedigree(pc, self.ped)
        pc.load_covarfile(open(self.filenames[4]), indices=[2,3])
        self.assertEqual(1, len(pc.phenotype_data))
        self.assertEqual(2, len(pc.covariate_data))
        self.assertEqual(6, len(pc.covariate_data[0]))
        self.assertEqual(6, len(pc.covariate_data[1]))
        self.assertEqual(6, len(pc.phenotype_data[0]))
        self.assertEqual(["Pheno-1"], pc.phenotype_names)
        self.assertEqual(["IBM", "MSA"], pc.covariate_labels)


        index = 0
        for p1, p2, p3 in self.phenotypes:
            self.assertEqual(p2, pc.covariate_data[0][index])
            self.assertEqual(p3, pc.covariate_data[1][index])
            index += 1

        pc = PhenoCovar()
        load_pedigree(pc, self.ped)
        pc.load_covarfile(open(self.filenames[4]), indices=[1,2,3])
        self.assertEqual(1, len(pc.phenotype_data))
        self.assertEqual(3, len(pc.covariate_data))
        self.assertEqual(6, len(pc.covariate_data[0]))
        self.assertEqual(6, len(pc.covariate_data[1]))
        self.assertEqual(6, len(pc.covariate_data[2]))
        self.assertEqual(["BMI", "IBM", "MSA"], pc.covariate_labels)

        index = 0
        for p1, p2, p3 in self.phenotypes:
            self.assertEqual(p1, pc.covariate_data[0][index])
            self.assertEqual(p2, pc.covariate_data[1][index])
            self.assertEqual(p3, pc.covariate_data[2][index])
            index += 1
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        load_pedigree(pc, self.ped)
        pc.load_covarfile(open(self.filenames[4]), indices=[2,3])
        self.assertEqual(1, len(pc.phenotype_data))
        self.assertEqual(3, len(pc.covariate_data))
        self.assertEqual(6, len(pc.covariate_data[0]))
        self.assertEqual(6, len(pc.covariate_data[1]))
        self.assertEqual(6, len(pc.covariate_data[2]))
        self.assertEqual(6, len(pc.phenotype_data[0]))
        self.assertEqual(["Pheno-1"], pc.phenotype_names)
        self.assertEqual(["SEX", "IBM", "MSA"], pc.covariate_labels)

        index = 0
        for p1, p2, p3 in self.phenotypes:
            self.assertEqual(self.sex[index], pc.covariate_data[0][index])
            self.assertEqual(p2, pc.covariate_data[1][index])
            self.assertEqual(p3, pc.covariate_data[2][index])
            index += 1
Esempio n. 5
0
    def test_basic_population(self):
        # Indicate that we want to use sex as a covariate
        PhenoCovar.sex_as_covariate = True

        pc = PhenoCovar()
        #sex = [1, 1, 2, 2, 1, 1]

        for line in self.ped:
            fam, ind, sex, ph = line.split()
            pc.add_subject("%s:%s" % (fam, ind),
                           sex=int(sex),
                           phenotype=float(ph))

        self.assertEqual(1, len(pc.covariate_data))
        self.assertEqual(1, len(pc.phenotype_data))
        self.assertEqual(6, len(pc.covariate_data[0]))
        self.assertEqual(6, len(pc.phenotype_data[0]))
        i = 0

        for line in self.ped:
            fam, ind, sex, ph = line.split()
            iid = "%s:%s" % (fam, ind)
            pdata = pc.pedigree_data[iid]
            self.assertEqual(pdata, i)
            self.assertAlmostEqual(float(ph), pc.phenotype_data[0][i])
            self.assertEqual(int(sex), pc.covariate_data[0][i])

            i += 1

        # Indicate that we do not want to use sex as a covariate
        PhenoCovar.sex_as_covariate = False
        newpc = PhenoCovar()

        for line in self.ped:
            fam, ind, sex, ph = line.split()
            newpc.add_subject("%s:%s" % (fam, ind),
                              sex=int(sex),
                              phenotype=float(ph))

        # Test that sex wasn't loaded as a covariate due to the setting of PhenoCovar.sex_as_covariate
        self.assertEqual(0, len(newpc.covariate_data))
        self.assertEqual(1, len(newpc.phenotype_data))
        self.assertEqual(6, len(newpc.pedigree_data))
        self.assertEqual(6, len(newpc.phenotype_data[0]))

        i = 0

        for line in self.ped:
            fam, ind, sex, ph = line.split()
            iid = "%s:%s" % (fam, ind)
            pdata = pc.pedigree_data[iid]
            self.assertEqual(pdata, i)
            self.assertAlmostEqual(float(ph), pc.phenotype_data[0][i])

            i += 1
Esempio n. 6
0
    def test_tped_standardization2(self):
        DataParser.has_sex = True
        DataParser.has_pheno = True
        PhenoCovar.sex_as_covariate = True

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool)
        nonmissing[:] = True
        libgwas.standardizer.set_standardizer(
            libgwas.standardizer.NoStandardization)

        raw_pheno = [
            0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0
        ]
        raw_cov = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1]

        for pheno in pc:
            (y, c,
             total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(raw_pheno)):
                self.assertAlmostEqual(raw_pheno[i], y[i])
                self.assertAlmostEqual(raw_cov[i], c[0][i])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        pc.do_standardize_variables = True
        libgwas.standardizer.set_standardizer(Standardizer)

        std_pheno = [
            -1.61601695, -0.73455316, 1.02837442, -0.4407319, 0.73455316,
            1.02837442, -1.61601695, -0.73455316, 1.02837442, -0.4407319,
            0.73455316, 1.02837442
        ]
        std_cov = [
            -0.70710678, -0.70710678, 1.41421356, 1.41421356, -0.70710678,
            -0.70710678, -0.70710678, -0.70710678, 1.41421356, 1.41421356,
            -0.70710678, -0.70710678
        ]
        for pheno in pc:
            (y, c,
             total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(std_pheno)):
                self.assertAlmostEqual(std_pheno[i], y[i])
                self.assertAlmostEqual(std_cov[i], c[0][i])
Esempio n. 7
0
    def test_tped_standardization_w_dbl_missing(self):
        PhenoCovar.sex_as_covariate = True
        DataParser.ind_exclusions = ["11:11", "12:12"]

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool)
        nonmissing[:] = True
        nonmissing[0] = False
        nonmissing[1] = False
        libgwas.standardizer.set_standardizer(
            libgwas.standardizer.NoStandardization)

        raw_pheno = [1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5]
        raw_cov = [2, 2, 1, 1, 1, 1, 2, 2]

        for pheno in pc:
            (y, c,
             total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))
            for i in range(0, len(raw_pheno)):
                self.assertAlmostEqual(raw_pheno[i], y[i])
                self.assertAlmostEqual(raw_cov[i], c[0][i])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pc.do_standardize_variables = True
        libgwas.standardizer.set_standardizer(Standardizer)

        std_pheno = [
            1.19915853, -0.26322992, 0.90668084, 1.19915853, -1.43314068,
            -0.55570761, 1.19915853, -0.26322992
        ]
        std_cov = [
            1.22474487, 1.22474487, -0.81649658, -0.81649658, -0.81649658,
            -0.81649658, 1.22474487, 1.22474487
        ]
        test_var = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
        for pheno in pc:
            (y, c,
             total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(std_pheno)):
                self.assertAlmostEqual(std_pheno[i], y[i])
                self.assertAlmostEqual(std_cov[i], c[0][i])
Esempio n. 8
0
    def testPedigreeIndExclusionsMissingComplete(self):
        DataParser.ind_exclusions = ["11:11", "12:12"]
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        genotypes = [
            [0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
            [-1, -1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1],
            [-1, 2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
            [-1, 1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2],
            [-1, 2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
            [-1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
             [0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]
        ]
        index = 0
        for snp in ped_parser:

            self.assertEqual(genotypes[index][0:10], list(snp.genotype_data))
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            index += 1
        self.assertEqual(7, index)
Esempio n. 9
0
    def testPedigreeNoFamId(self):
        DataParser.has_fid = False
        f = open(self.ped_filename, "w")
        f.write("""1 0 0 1 0.1 A A G T A A G G C T G T T T
2 0 0 1 0.4 A C G T G G C G T T G G C T
3 0 0 2 1.0 A A G G A G C C C C G T C T
4 0 0 2 0.5 A A G G A G C G C T G G T T
5 0 0 1 0.9 A C G G A A C G C C G G T T
6 0 0 1 1.0 A A G T A A G G C C G G T T
7 0 0 1 0.1 A A G T A A G G C T G G T T
8 0 0 1 0.4 A C G T G G C G T T G G C T
9 0 0 2 1.0 A A G G A G C C C C G T C T
10 0 0 2 0.5 A A G G A G C G C T G G T T
11 0 0 1 0.9 A C G G A A C G C C G G T T
12 0 0 1 1.0 A A G T A A G G C C G G T T""")

        f.close()
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Esempio n. 10
0
    def testPedigreeWithLiability(self):
        DataParser.has_liability = True
        f = open(self.ped_filename, "w")
        f.write("""1 1 0 0 1 0.1 1 A A G T A A G G C T G T T T
2 2 0 0 1 0.4 1 A C G T G G C G T T G G C T
3 3 0 0 2 1.0 1 A A G G A G C C C C G T C T
4 4 0 0 2 0.5 1 A A G G A G C G C T G G T T
5 5 0 0 1 0.9 1 A C G G A A C G C C G G T T
6 6 0 0 1 1.0 1 A A G T A A G G C C G G T T
7 7 0 0 1 0.1 1 A A G T A A G G C T G G T T
8 8 0 0 1 0.4 1 A C G T G G C G T T G G C T
9 9 0 0 2 1.0 1 A A G G A G C C C C G T C T
10 10 0 0 2 0.5 1 A A G G A G C G C T G G T T
11 11 0 0 1 0.9 1 A C G G A A C G C C G G T T
12 12 0 0 1 1.0 1 A A G T A A G G C C G G T T""")

        f.close()
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Esempio n. 11
0
    def testTPedLiability(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 1 0 0 1 0.1 1
2 2 0 0 1 0.4 1
3 3 0 0 2 1.0 1
4 4 0 0 2 0.5 1
5 5 0 0 1 0.9 1
6 6 0 0 1 1.0 1
7 7 0 0 1 0.1 1
8 8 0 0 1 0.4 1
9 9 0 0 2 1.0 1
10 10 0 0 2 0.5 1
11 11 0 0 1 0.9 1
12 12 0 0 1 1.0 1""")
        f.close()

        DataParser.has_liability = True
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [
            x.strip().split() for x in open(self.tped_filename).readlines()
        ]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Esempio n. 12
0
    def testTPedNoParents(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 1 1 0.1
2 2 1 0.4
3 3 2 1.0
4 4 2 0.5
5 5 1 0.9
6 6 1 1.0
7 7 1 0.1
8 8 1 0.4
9 9 2 1.0
10 10 2 0.5
11 11 1 0.9
12 12 1 1.0""")

        f.close()
        DataParser.has_parents = False
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [
            x.strip().split() for x in open(self.tped_filename).readlines()
        ]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Esempio n. 13
0
    def testPedRegionBoundaryWithExclusionsTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"])
        DataParser.boundary.LoadExclusions(snps=["rs0007"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = [x.split() for x in open(self.tped_filename).readlines()]
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            index += 1
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)
Esempio n. 14
0
    def testTPedNoParentsPheno(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 1 1
2 2 1
3 3 2
4 4 2
5 5 1
6 6 1
7 7 1
8 8 1
9 9 2
10 10 2
11 11 1
12 12 1""")

        f.close()

        DataParser.has_parents = False
        DataParser.has_pheno = False
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.tped_filename, split=True)
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Esempio n. 15
0
    def testMissingComplete(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename,
                                    self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        missing = 0
        valid = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)
                try:
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(self.missing_genotypes[index],
                                     list(genodata.genotypes))
                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    valid += 1
                except TooMuchMissing as e:
                    missing += 1
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(0, missing)
        self.assertEqual(7, valid)
Esempio n. 16
0
    def testForInvariant(self):
        prefix = "__test_pedigree"
        self.pheno_file = "%s_mch.txt" % (prefix)
        with open(self.pheno_file, "w") as f:
            f.write("""FID\tIID\tBMI\tIBM\tMSA
1\t1\t0.1\t1.0\t1.0
2\t2\t0.2\t0.5\t1.0
3\t3\t0.3\t0.6\t1.0
4\t4\t0.4\t0.5\t1.0
5\t5\t0.5\t1.0\t1.0
6\t6\t0.6\t0.1\t1.0
17\t7\t0.1\t1.0\t1.0
8\t8\t0.2\t0.5\t1.0
9\t9\t0.3\t0.6\t1.0
10\t10\t0.4\t0.5\t1.0
11\t11\t0.5\t1.0\t1.0
12\t12\t0.6\t0.1\t1.0""")

        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        with open(self.pheno_file) as f2:
            pc.load_phenofile(f2, indices=[3])
            index = 0
            mapdata = get_lines(self.map_filename, split=True)

            with self.assertRaises(InvariantVar):
                for snp in ped_parser:
                    for y in pc:
                        non_missing = numpy.ones(len(snp.genotype_data),
                                                 dtype=bool)
                        (pheno, covariates, nonmissing) = y.get_variables(
                            numpy.invert(non_missing))
Esempio n. 17
0
    def testPedigreeNoPheno(self):
        DataParser.has_pheno = False
        with open(self.ped_filename, "w") as f:
            f.write("""1 1 0 0 1 A A G T A A G G C T G T T T
2 2 0 0 1 A C G T G G C G T T G G C T
3 3 0 0 2 A A G G A G C C C C G T C T
4 4 0 0 2 A A G G A G C G C T G G T T
5 5 0 0 1 A C G G A A C G C C G G T T
6 6 0 0 1 A A G T A A G G C C G G T T
7 7 0 0 1 A A G T A A G G C T G G T T
8 8 0 0 1 A C G T G G C G T T G G C T
9 9 0 0 2 A A G G A G C C C C G T C T
10 10 0 0 2 A A G G A G C G C T G G T T
11 11 0 0 1 A C G G A A C G C C G G T T
12 12 0 0 1 A A G T A A G G C C G G T T""")

        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Esempio n. 18
0
    def testTpedBounded(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary = BoundaryCheck(bp=[2000,3000])
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]

        self.assertEqual(1, results[0].chr)
        self.assertEqual(2000, results[0].pos)
        self.assertAlmostEqual(0.57778118, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.02798537, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.033790691857, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.40755865, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.03275892, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.0475661, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.49101013, results[0].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.44661276, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.01663975, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.03443300, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.62891811, results[1].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.05712017, results[1].betas[3], places=6)
        self.assertAlmostEqual(0.04783608, results[1].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.232446188, results[1].beta_pvalues[3], places=6)
Esempio n. 19
0
    def testTpedAnalysis(self):
        # We'll start with the correct phenotype with the genotypes, so we'll use
        # a boundary to restrict us to only use the first SNP
        BoundaryCheck.chrom = 1
        DataParser.boundary = BoundaryCheck()
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]
        self.assertAlmostEqual(0.0034756155, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.1134684009, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.0337649965541, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.0007779211, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(-0.0033479839, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.0492050029324, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.9457525716, results[0].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.57778118, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.02798537, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.033790691857, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.40755865, results[1].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.03275892, results[1].betas[3], places=6)
        self.assertAlmostEqual(0.0475661, results[1].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.49101013, results[1].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6)
        self.assertAlmostEqual(0.01663975, results[2].betas[1], places=6)
        self.assertAlmostEqual(0.03443300, results[2].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.62891811, results[2].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.05712017, results[2].betas[3], places=6)
        self.assertAlmostEqual(0.04783608, results[2].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.232446188, results[2].beta_pvalues[3], places=6)
Esempio n. 20
0
    def testBedAnalysis(self):
        # We'll start with the correct phenotype with the genotypes, so we'll use
        # a boundary to restrict us to only use the first SNP
        BoundaryCheck.chrom = 1
        DataParser.boundary = BoundaryCheck()
        pheno = PhenoCovar()
        ped_parser = bed_parser.Parser(self.nonmissing_fam,
                                       self.nonmissing_bim,
                                       self.nonmissing_bed)
        ped_parser.load_fam(pheno)
        ped_parser.load_bim(map3=False)
        ped_parser.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(ped_parser, pheno)]

        self.assertAlmostEqual(0.00347562, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.00085539, results[0].lmpv, places=6)
        self.assertAlmostEqual(0.5777812, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.42212155, results[1].lmpv, places=6)
        self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6)
        self.assertAlmostEqual(0.61386344, results[2].lmpv, places=6)
        self.assertAlmostEqual(0.13555597, results[3].p_mvtest, places=6)
        self.assertAlmostEqual(0.59682217, results[3].lmpv, places=6)
        self.assertAlmostEqual(0.54029842, results[4].p_mvtest, places=6)
        self.assertAlmostEqual(0.60475964, results[4].lmpv, places=6)
        self.assertAlmostEqual(0.03547514, results[5].p_mvtest, places=6)
        self.assertAlmostEqual(0.86663730, results[5].lmpv, places=6)
        self.assertAlmostEqual(0.79249216, results[6].p_mvtest, places=6)
        self.assertAlmostEqual(0.67678089, results[6].lmpv, places=6)
        self.assertAlmostEqual(0.20973300, results[7].p_mvtest, places=6)
        self.assertAlmostEqual(0.14431260, results[7].lmpv, places=6)
        self.assertAlmostEqual(0.81471528, results[8].p_mvtest, places=6)
        self.assertAlmostEqual(0.56378497, results[8].lmpv, places=6)
Esempio n. 21
0
    def testBedAnalysisCov(self):
        PhenoCovar.sex_as_covariate = True
        DataParser.boundary = BoundaryCheck()
        pheno = PhenoCovar()
        ped_parser = bed_parser.Parser(self.nonmissing_fam,
                                       self.nonmissing_bim,
                                       self.nonmissing_bed)
        ped_parser.load_fam(pheno)
        ped_parser.load_bim(map3=False)
        ped_parser.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(ped_parser, pheno)]

        self.assertAlmostEqual(0.0034238, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.0143949, results[0].lmpv, places=6)
        self.assertAlmostEqual(0.58495059, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.65786, results[1].lmpv, places=5)
        self.assertAlmostEqual(0.45178985, results[2].p_mvtest, places=6)
        self.assertAlmostEqual(0.83956, results[2].lmpv, places=5)
        self.assertAlmostEqual(0.133661, results[3].p_mvtest, places=6)
        self.assertAlmostEqual(0.82169, results[3].lmpv, places=5)
        self.assertAlmostEqual(0.541391, results[4].p_mvtest, places=6)
        self.assertAlmostEqual(0.83595, results[4].lmpv, places=5)
        self.assertAlmostEqual(0.035665, results[5].p_mvtest, places=6)
        self.assertAlmostEqual(0.94900, results[5].lmpv, places=5)
        self.assertAlmostEqual(0.784660, results[6].p_mvtest, places=6)
        self.assertAlmostEqual(0.59324, results[6].lmpv, places=5)
        self.assertAlmostEqual(0.2137434, results[7].p_mvtest, places=6)
        self.assertAlmostEqual(0.18069, results[7].lmpv, places=5)
        self.assertAlmostEqual(0.8160148, results[8].p_mvtest, places=6)
        self.assertAlmostEqual(0.79734, results[8].lmpv, places=5)
Esempio n. 22
0
    def testPedSnpBoundaryTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        DataParser.boundary = SnpBoundaryCheck(snps=["rs0001-rs0003"])
        BoundaryCheck.chrom = 1
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = libgwas.get_lines(self.tped_filename, split=True)
        index = 0
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertAlmostEqual(self.hetero_freq_tped[index],
                                   snp.hetero_freq,
                                   places=4)
            index += 1
        self.assertEqual(3, ped_parser.locus_count)
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(3, index)
Esempio n. 23
0
    def testRegionBoundaryWithExclusions(self):
        DataParser.ind_exclusions = ["1:1", "2:2", "3:3"]
        genotypes = [[0, 1, 0, 0, 1, 0, 0, 1, 0], [0, 0, 1, 1, 1, 0, 0, 0, 1],
                     [1, 0, 0, 0, 2, 1, 1, 0, 0], [1, 1, 0, 0, 1, 2, 1, 1, 0],
                     [1, 0, 0, 1, 2, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0],
                     [0, 0, 0, 0, 1, 1, 0, 0, 0]]

        BoundaryCheck.chrom = 2
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [
            x.strip().split() for x in open(self.tped_filename).readlines()
        ]
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Esempio n. 24
0
    def testTPedNoFamIDSex(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 0 0 0.1
2 0 0 0.4
3 0 0 1.0
4 0 0 0.5
5 0 0 0.9
6 0 0 1.0
7 0 0 0.1
8 0 0 0.4
9 0 0 1.0
10 0 0 0.5
11 0 0 0.9
12 0 0 1.0""")

        f.close()

        DataParser.has_fid = False
        DataParser.has_sex = False
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.tped_filename, split=True)
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Esempio n. 25
0
    def testPedigreeNoFamSexOrParents(self):
        DataParser.has_fid = False
        DataParser.has_sex = False
        DataParser.has_parents = False
        with open(self.ped_filename, "w") as f:
            f.write("""1 0.1 A A G T A A G G C T G T T T
2 0.4 A C G T G G C G T T G G C T
3 1.0 A A G G A G C C C C G T C T
4 0.5 A A G G A G C G C T G G T T
5 0.9 A C G G A A C G C C G G T T
6 1.0 A A G T A A G G C C G G T T
7 0.1 A A G T A A G G C T G G T T
8 0.4 A C G T G G C G T T G G C T
9 1.0 A A G G A G C C C C G T C T
10 0.5 A A G G A G C G C T G G T T
11 0.9 A C G G A A C G C C G G T T
12 1.0 A A G T A A G G C C G G T T""")

        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Esempio n. 26
0
    def testPedWithMissingMxIndExclusionsToo(self):
        pc = PhenoCovar()
        DataParser.ind_exclusions = ["2:2", "3:3"]
        DataParser.ind_miss_tol = 0.5  # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [
            x.strip().split()
            for x in open(self.miss_tped_filename).readlines()
        ]

        genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, 1],
                               [1, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes_w_missing[index],
                             list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Esempio n. 27
0
 def testPedigreeIndExclusionsMissingIndThresh(self):
     pc = PhenoCovar()
     DataParser.ind_exclusions = ["11:11", "12:12"]
     DataParser.ind_miss_tol = 0.5  # We should only lose 1
     ped_parser = PedigreeParser(self.map_filename,
                                 self.ped_filename_missing)
     ped_parser.load_mapfile()
     ped_parser.load_genotypes(pc)
     mapdata = get_lines(self.map_filename, split=True)
     genotypes = [[1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
                  [-1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1],
                  [2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                  [1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2],
                  [2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                  [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                  [1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
     index = 0
     map_idx = 0
     for snp in ped_parser:
         self.assertEqual(int(mapdata[map_idx][0]), snp.chr)
         self.assertEqual(int(mapdata[map_idx][3]), snp.pos)
         self.assertEqual(mapdata[map_idx][1], snp.rsid)
         self.assertEqual(genotypes[index][0:9], list(snp.genotype_data))
         index += 1
         map_idx += 1
     self.assertEqual(7, index)
Esempio n. 28
0
    def testTPedPhenoComplete(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        self.assertEqual(12, len(pc.covariate_data[0]))
        self.assertEqual(12, len(pc.phenotype_data[0]))
        self.assertEqual(1, len(pc.phenotype_names))
        mapdata = [
            x.strip().split() for x in open(self.tped_filename).readlines()
        ]

        index = 0
        self.assertEqual(7, ped_parser.locus_count)
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Esempio n. 29
0
    def testMissingSnpThresh(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5  # We should only lose 1
        ped_parser = PedigreeParser(self.map_filename,
                                    self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        #self.assertEqual([0,1,0,0,0,0,0,0,0,0,0,0], list(ped_parser.individual_mask))
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        missing = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)
                try:
                    genodata = snp.get_genotype_data(nonmissing)

                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    self.assertEqual(self.missing_genotypes[index],
                                     list(genodata.genotypes))
                except TooMuchMissing as e:
                    missing += 1
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(1, missing)
        self.assertEqual(7, index)
Esempio n. 30
0
    def testPedSnpBoundary2TPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0006"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = [x.split() for x in open(self.tped_filename).readlines()]
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertAlmostEqual(self.hetero_freq_tped[index],
                                   snp.hetero_freq,
                                   places=4)
            index += 1
        self.assertEqual(2, ped_parser.locus_count)
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)