Ejemplo n.º 1
0
    def setUp(self):
        self.missing = "tests/bedfiles/ped_missing"
        self.missing_bed = resource_filename("libgwas",
                                             "%s.bed" % (self.missing))
        self.missing_bim = resource_filename("libgwas",
                                             "%s.bim" % (self.missing))
        self.missing_fam = resource_filename("libgwas",
                                             "%s.fam" % (self.missing))
        self.genotypes = [[0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
                          [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                          [0, 2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                          [0, 1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                          [1, 2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                          [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                          [0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
        self.nonmissing = "tests/bedfiles/ped_nomiss"
        self.nonmissing_bed = resource_filename("libgwas",
                                                "%s.bed" % (self.nonmissing))
        self.nonmissing_bim = resource_filename("libgwas",
                                                "%s.bim" % (self.nonmissing))
        self.nonmissing_fam = resource_filename("libgwas",
                                                "%s.fam" % (self.nonmissing))

        self.genotypes_w_missing = [[0, 1],
                                    [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                                    [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                                    [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                                    [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                                    [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                                    [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
        self.nonmissing_mapdata = libgwas.get_lines(self.nonmissing_bim,
                                                    split=True)
        self.missing_mapdata = libgwas.get_lines(self.missing_bim, split=True)

        self.phenotypes = [
            0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0
        ]
        self.sex = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1]

        self.chrom = BoundaryCheck.chrom
        self.boundary = DataParser.boundary
        self.min_maf = DataParser.min_maf
        self.max_maf = DataParser.max_maf
        self.snp_miss_tol = DataParser.snp_miss_tol
        self.ind_miss_tol = DataParser.ind_miss_tol
        self.sex_as_covar = PhenoCovar.sex_as_covariate
        self.has_sex = DataParser.has_sex
        self.has_pheno = DataParser.has_pheno
        self.has_parents = DataParser.has_parents
        self.has_fid = DataParser.has_fid
        self.has_liability = DataParser.has_liability

        DataParser.boundary = BoundaryCheck()
Ejemplo n.º 2
0
    def testPedWithMissingMxIndExclusionsToo(self):
        pc = PhenoCovar()
        DataParser.ind_exclusions = ["2:2", "3:3"]
        DataParser.ind_miss_tol = 0.5  # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.miss_tped_filename, split=True)
        genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, 1],
                               [1, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes_w_missing[index],
                             list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 3
0
    def testCmdLinePedigreeWithLiability(self):
        f = open(self.ped_filename, "w")
        f.write("""1 1 0 0 1 0.1 1 A A G T A A G G C T G T T T
2 2 0 0 1 0.4 1 A C G T G G C G T T G G C T
3 3 0 0 2 1.0 1 A A G G A G C C C C G T C T
4 4 0 0 2 0.5 1 A A G G A G C G C T G G T T
5 5 0 0 1 0.9 1 A C G G A A C G C C G G T T
6 6 0 0 1 1.0 1 A A G T A A G G C C G G T T
7 7 0 0 1 0.1 1 A A G T A A G G C T G G T T
8 8 0 0 1 0.4 1 A C G T G G C G T T G G C T
9 9 0 0 2 1.0 1 A A G G A G C C C C G T C T
10 10 0 0 2 0.5 1 A A G G A G C G C T G G T T
11 11 0 0 1 0.9 1 A C G G A A C G C C G G T T
12 12 0 0 1 1.0 1 A A G T A A G G C C G G T T""")

        f.close()
        cmds = "--file %s --liability" % (self.ped_filename.split(".")[0])
        app = mvtest.MVTestApplication()
        ped_parser, pc, args = app.LoadCmdLine(cmds.split(" "))

        mapdata = libgwas.get_lines(self.map_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 4
0
    def testPedigreeNoPheno(self):
        DataParser.has_pheno = False
        with open(self.ped_filename, "w") as f:
            f.write("""1 1 0 0 1 A A G T A A G G C T G T T T
2 2 0 0 1 A C G T G G C G T T G G C T
3 3 0 0 2 A A G G A G C C C C G T C T
4 4 0 0 2 A A G G A G C G C T G G T T
5 5 0 0 1 A C G G A A C G C C G G T T
6 6 0 0 1 A A G T A A G G C C G G T T
7 7 0 0 1 A A G T A A G G C T G G T T
8 8 0 0 1 A C G T G G C G T T G G C T
9 9 0 0 2 A A G G A G C C C C G T C T
10 10 0 0 2 A A G G A G C G C T G G T T
11 11 0 0 1 A C G G A A C G C C G G T T
12 12 0 0 1 A A G T A A G G C C G G T T""")

        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 5
0
    def setUp(self):
        self.WriteTestFiles()

        self.ped = get_lines(self.ped_filename)

        self.phenotypes = [
            0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0
        ]
        self.sex = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1]

        self.chrom = BoundaryCheck.chrom
        self.boundary = DataParser.boundary
        DataParser.boundary = BoundaryCheck()
        self.min_maf = DataParser.min_maf
        self.max_maf = DataParser.max_maf
        self.snp_miss_tol = DataParser.snp_miss_tol
        self.ind_miss_tol = DataParser.ind_miss_tol
        DataParser.ind_exclusions = []
        DataParser.ind_inclusions = []
        self.sex_as_covar = PhenoCovar.sex_as_covariate
        self.has_sex = DataParser.has_sex
        self.has_pheno = DataParser.has_pheno
        self.has_parents = DataParser.has_parents
        self.has_fid = DataParser.has_fid
        self.has_liability = DataParser.has_liability
        self.sex_as_covariate = PhenoCovar.sex_as_covariate
        self.standardizer = libgwas.standardizer.get_standardizer()
        libgwas.standardizer.set_standardizer(
            libgwas.standardizer.NoStandardization)
Ejemplo n.º 6
0
    def testForInvariant(self):
        prefix = "__test_pedigree"
        self.pheno_file = "%s_mch.txt" % (prefix)
        with open(self.pheno_file, "w") as f:
            f.write("""FID\tIID\tBMI\tIBM\tMSA
1\t1\t0.1\t1.0\t1.0
2\t2\t0.2\t0.5\t1.0
3\t3\t0.3\t0.6\t1.0
4\t4\t0.4\t0.5\t1.0
5\t5\t0.5\t1.0\t1.0
6\t6\t0.6\t0.1\t1.0
17\t7\t0.1\t1.0\t1.0
8\t8\t0.2\t0.5\t1.0
9\t9\t0.3\t0.6\t1.0
10\t10\t0.4\t0.5\t1.0
11\t11\t0.5\t1.0\t1.0
12\t12\t0.6\t0.1\t1.0""")

        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        with open(self.pheno_file) as f2:
            pc.load_phenofile(f2, indices=[3])
            index = 0
            mapdata = get_lines(self.map_filename, split=True)

            with self.assertRaises(InvariantVar):
                for snp in ped_parser:
                    for y in pc:
                        non_missing = numpy.ones(len(snp.genotype_data),
                                                 dtype=bool)
                        (pheno, covariates, nonmissing) = y.get_variables(
                            numpy.invert(non_missing))
Ejemplo n.º 7
0
 def testPedigreeIndExclusionsMissingIndThresh(self):
     pc = PhenoCovar()
     DataParser.ind_exclusions = ["11:11", "12:12"]
     DataParser.ind_miss_tol = 0.5  # We should only lose 1
     ped_parser = PedigreeParser(self.map_filename,
                                 self.ped_filename_missing)
     ped_parser.load_mapfile()
     ped_parser.load_genotypes(pc)
     mapdata = get_lines(self.map_filename, split=True)
     genotypes = [[1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
                  [-1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1],
                  [2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                  [1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2],
                  [2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                  [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                  [1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
     index = 0
     map_idx = 0
     for snp in ped_parser:
         self.assertEqual(int(mapdata[map_idx][0]), snp.chr)
         self.assertEqual(int(mapdata[map_idx][3]), snp.pos)
         self.assertEqual(mapdata[map_idx][1], snp.rsid)
         self.assertEqual(genotypes[index][0:9], list(snp.genotype_data))
         index += 1
         map_idx += 1
     self.assertEqual(7, index)
Ejemplo n.º 8
0
    def testPedCmdLineMIND2(self):
        cmds = "--ped %s --map %s --mind=0.10" % (self.ped_filename_missing,
                                                  self.map_filename)

        app = mvtest.MVTestApplication()
        dataset, vars, args = app.LoadCmdLine(cmds.split(" "))

        genotypes = [[0, 0, 1, 0], [1, 0, 0, 1], [0, 1, 0, 0], [0, 1, 1, 0],
                     [0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]

        mapdata = libgwas.get_lines(self.map_filename, split=True)

        index = 0
        for snp in dataset:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(genotypes[index], list(genodata.genotypes))
                self.assertEqual(int(mapdata[index][0]), snp.chr)
                self.assertEqual(int(mapdata[index][3]), snp.pos)
                self.assertEqual(mapdata[index][1], snp.rsid)
                index += 1
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                skipped += 1
            except TooMuchMissingpPhenoCovar as e:
                pass
        self.assertEqual(5, index)  # Last two are fixed
Ejemplo n.º 9
0
    def testMissingSnpThresh(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5  # We should only lose 1
        ped_parser = PedigreeParser(self.map_filename,
                                    self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        #self.assertEqual([0,1,0,0,0,0,0,0,0,0,0,0], list(ped_parser.individual_mask))
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        missing = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)
                try:
                    genodata = snp.get_genotype_data(nonmissing)

                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    self.assertEqual(self.missing_genotypes[index],
                                     list(genodata.genotypes))
                except TooMuchMissing as e:
                    missing += 1
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(1, missing)
        self.assertEqual(7, index)
Ejemplo n.º 10
0
    def testMissingComplete(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename,
                                    self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        missing = 0
        valid = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)
                try:
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(self.missing_genotypes[index],
                                     list(genodata.genotypes))
                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    valid += 1
                except TooMuchMissing as e:
                    missing += 1
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(0, missing)
        self.assertEqual(7, valid)
Ejemplo n.º 11
0
    def testTPedNoParentsPheno(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 1 1
2 2 1
3 3 2
4 4 2
5 5 1
6 6 1
7 7 1
8 8 1
9 9 2
10 10 2
11 11 1
12 12 1""")

        f.close()

        DataParser.has_parents = False
        DataParser.has_pheno = False
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.tped_filename, split=True)
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 12
0
    def testCmdLinePedigreeNoFamSexOrParents(self):
        f = open(self.ped_filename, "w")
        f.write("""1 0.1 A A G T A A G G C T G T T T
2 0.4 A C G T G G C G T T G G C T
3 1.0 A A G G A G C C C C G T C T
4 0.5 A A G G A G C G C T G G T T
5 0.9 A C G G A A C G C C G G T T
6 1.0 A A G T A A G G C C G G T T
7 0.1 A A G T A A G G C T G G T T
8 0.4 A C G T G G C G T T G G C T
9 1.0 A A G G A G C C C C G T C T
10 0.5 A A G G A G C G C T G G T T
11 0.9 A C G G A A C G C C G G T T
12 1.0 A A G T A A G G C C G G T T""")

        f.close()
        cmds = "--file %s --no-parents --no-fid --no-sex" % (
            self.ped_filename.split(".")[0])
        app = mvtest.MVTestApplication()
        ped_parser, pc, args = app.LoadCmdLine(cmds.split(" "))

        mapdata = libgwas.get_lines(self.map_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 13
0
    def testTPedNoFamIDSex(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 0 0 0.1
2 0 0 0.4
3 0 0 1.0
4 0 0 0.5
5 0 0 0.9
6 0 0 1.0
7 0 0 0.1
8 0 0 0.4
9 0 0 1.0
10 0 0 0.5
11 0 0 0.9
12 0 0 1.0""")

        f.close()

        DataParser.has_fid = False
        DataParser.has_sex = False
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.tped_filename, split=True)
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 14
0
    def testTPedLiability(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 1 0 0 1 0.1 1
2 2 0 0 1 0.4 1
3 3 0 0 2 1.0 1
4 4 0 0 2 0.5 1
5 5 0 0 1 0.9 1
6 6 0 0 1 1.0 1
7 7 0 0 1 0.1 1
8 8 0 0 1 0.4 1
9 9 0 0 2 1.0 1
10 10 0 0 2 0.5 1
11 11 0 0 1 0.9 1
12 12 0 0 1 1.0 1""")
        f.close()

        DataParser.has_liability = True
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.tped_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 15
0
    def testPedRegionBoundaryWithExclusionsTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"])
        DataParser.boundary.LoadExclusions(snps=["rs0007"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = libgwas.get_lines(self.tped_filename, split=True)
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            index += 1
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)
Ejemplo n.º 16
0
    def testPedSnpBoundary2TPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0006"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = libgwas.get_lines(self.tped_filename, split=True)
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertAlmostEqual(self.hetero_freq_tped[index],
                                   snp.hetero_freq,
                                   places=4)
            index += 1
        self.assertEqual(2, ped_parser.locus_count)
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)
Ejemplo n.º 17
0
    def testPedigreeNoFamSexOrParents(self):
        DataParser.has_fid = False
        DataParser.has_sex = False
        DataParser.has_parents = False
        with open(self.ped_filename, "w") as f:
            f.write("""1 0.1 A A G T A A G G C T G T T T
2 0.4 A C G T G G C G T T G G C T
3 1.0 A A G G A G C C C C G T C T
4 0.5 A A G G A G C G C T G G T T
5 0.9 A C G G A A C G C C G G T T
6 1.0 A A G T A A G G C C G G T T
7 0.1 A A G T A A G G C T G G T T
8 0.4 A C G T G G C G T T G G C T
9 1.0 A A G G A G C C C C G T C T
10 0.5 A A G G A G C G C T G G T T
11 0.9 A C G G A A C G C C G G T T
12 1.0 A A G T A A G G C C G G T T""")

        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 18
0
    def testPedWithMissingMxSnpComplete(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5  # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.miss_tped_filename, split=True)

        genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0],
                               [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]

        hetero_freq_tped = [
            0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727
        ]

        self.assertEqual(7, ped_parser.locus_count)
        index = 0
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            index += 1
        self.assertEqual(7, index)
        index = 0
        missing = 0
        valid = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)
                try:
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    self.assertEqual(genotypes_w_missing[index],
                                     list(snp.genotype_data))
                    self.assertAlmostEqual(hetero_freq_tped[index],
                                           genodata.hetero_freq,
                                           places=4)

                    valid += 1
                except TooMuchMissing as e:
                    missing += 1
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(1, missing)
        self.assertEqual(6, valid)
        self.assertEqual(7, index)
Ejemplo n.º 19
0
    def testPedNegativePositions(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 20
0
    def testCmdLineNoExternalVars(self):
        cmds = "--file %s --sex" % (self.ped_filename.split(".")[0])
        app = mvtest.MVTestApplication()
        ped_parser, pc, args = app.LoadCmdLine(cmds.split(" "))

        mapdata = libgwas.get_lines(self.map_filename, split=True)
        c = pc.covariate_data
        y = pc.phenotype_data

        self.assertEqual(["SEX"], pc.covariate_labels)
        self.assertEqual(["Pheno-1"], pc.phenotype_names)

        for i in range(0, len(y[0])):
            self.assertAlmostEqual(self.dummy_pheno[i], y[0][i])
            self.assertAlmostEqual(1, c[0][i])
Ejemplo n.º 21
0
    def testPedNegativePositions(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.misssnp_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.misssnp_tped_filename, split=True)

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 22
0
    def testWithFewIndividuals(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.miniped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        genotypes = [[0, 1, 0], [1, 1, 0], [0, 2, 1], [0, 1, 1], [1, 0, 1],
                     [1, 0, 1], [0, 1, 1]]
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 23
0
    def testPedComplete(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        self.assertEqual(12, ped_parser.ind_count)
        mapdata = libgwas.get_lines(self.tped_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 24
0
    def testPedNegativePosLocalChromMissSNP(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary.LoadExclusions(snps=["rs0004"])

        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(3, index)
Ejemplo n.º 25
0
    def testPedigreeIndExclusionsComplete(self):
        DataParser.ind_exclusions = ["11:11", "12:12"]
        pc = PhenoCovar()

        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index][0:10],
                             list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 26
0
    def testPedBoundary(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        DataParser.boundary = BoundaryCheck()
        BoundaryCheck.chrom = 2
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        pedigree = get_lines(self.map_filename, split=True)

        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 27
0
    def testCmdLinePhenoWithNames(self):
        cmds = "--file %s --pheno %s --pheno-names=BMI --covar %s --covar-names SEX,AGE" % (
            self.ped_filename.split(".")[0], self.pheno_covar,
            self.pheno_covar)
        app = mvtest.MVTestApplication()
        ped_parser, pc, args = app.LoadCmdLine(cmds.split(" "))

        mapdata = libgwas.get_lines(self.map_filename, split=True)

        c = pc.covariate_data
        y = pc.phenotype_data

        self.assertEqual(["SEX", "AGE"], pc.covariate_labels)
        self.assertEqual(["BMI"], pc.phenotype_names)

        for i in range(0, len(y[0])):
            self.assertAlmostEqual(self.phenotype_data[i], y[0][i])
            self.assertAlmostEqual(self.covariate_data[0][i], c[0][i])
            self.assertAlmostEqual(self.covariate_data[1][i], c[1][i])
Ejemplo n.º 28
0
    def testPedMultiPheno(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        with open(self.pheno_file) as f:
            pc.load_phenofile(f, indices=[2, 3])
        mapdata = get_lines(self.map_filename, split=True)

        sex = [1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1]
        pheno_data = [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
                      [1.0, 0.5, 0.6, 0.5, 1.0, 0.1],
                      [0.5, 1.0, 0.1, 0.5, 1.0, 0.2]]
        dual_pheno = [[1.0, 0.5, 0.6, 0.5, 1.0, 0.1, 0.5, 0.6, 0.5, 1.0, 0.1],
                      [0.5, 1.0, 0.1, 0.5, 1.0, 0.2, 1.0, 0.1, 0.5, 1.0, 0.2]]

        self.assertEqual(2, len(pc.phenotype_data))
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            idx = 0
            for y in pc:
                non_missing = numpy.ones(len(snp.genotype_data), dtype=bool)
                non_missing[6] = False
                (pheno, covariates,
                 nm_indata) = y.get_variables(numpy.invert(non_missing))
                for i in range(0, 11):
                    self.assertEqual(sex[i], covariates[0][i])
                    self.assertAlmostEqual(dual_pheno[idx][i], pheno[i])

                idx += 1

            self.assertEqual(2, idx)

            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 29
0
    def testTPedPhenoComplete(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        self.assertEqual(12, len(pc.covariate_data[0]))
        self.assertEqual(12, len(pc.phenotype_data[0]))
        self.assertEqual(1, len(pc.phenotype_names))
        mapdata = libgwas.get_lines(self.tped_filename, split=True)
        index = 0
        self.assertEqual(7, ped_parser.locus_count)
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 30
0
    def testPedCompleteAlternateIteration(self):
        """Useful if you need to iterate over these in a more controlled manner"""
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        snp = next(ped_parser.__iter__())
        try:
            while True:
                self.assertEqual(int(mapdata[index][0]), snp.chr)
                self.assertEqual(int(mapdata[index][3]), snp.pos)
                self.assertEqual(mapdata[index][1], snp.rsid)
                self.assertEqual(self.genotypes[index],
                                 list(snp.genotype_data))
                index += 1
                next(snp)

        except StopIteration:
            pass
        self.assertEqual(7, index)