Ejemplo n.º 1
0
    def testPedigreeNoFamId(self):
        DataParser.has_fid = False
        f = open(self.ped_filename, "w")
        f.write(
            """1 0 0 1 0.1 A A G T A A G G C T G T T T
2 0 0 1 0.4 A C G T G G C G T T G G C T
3 0 0 2 1.0 A A G G A G C C C C G T C T
4 0 0 2 0.5 A A G G A G C G C T G G T T
5 0 0 1 0.9 A C G G A A C G C C G G T T
6 0 0 1 1.0 A A G T A A G G C C G G T T
7 0 0 1 0.1 A A G T A A G G C T G G T T
8 0 0 1 0.4 A C G T G G C G T T G G C T
9 0 0 2 1.0 A A G G A G C C C C G T C T
10 0 0 2 0.5 A A G G A G C G C T G G T T
11 0 0 1 0.9 A C G G A A C G C C G G T T
12 0 0 1 1.0 A A G T A A G G C C G G T T"""
        )

        f.close()
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 2
0
 def testPedigreeIndExclusionsMissingIndThresh(self):
     pc = PhenoCovar()
     DataParser.ind_exclusions = ["11:11", "12:12"]
     DataParser.ind_miss_tol = 0.5  # We should only lose 1
     ped_parser = PedigreeParser(self.map_filename,
                                 self.ped_filename_missing)
     ped_parser.load_mapfile()
     ped_parser.load_genotypes(pc)
     mapdata = get_lines(self.map_filename, split=True)
     genotypes = [[1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
                  [-1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1],
                  [2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                  [1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2],
                  [2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                  [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                  [1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
     index = 0
     map_idx = 0
     for snp in ped_parser:
         self.assertEqual(int(mapdata[map_idx][0]), snp.chr)
         self.assertEqual(int(mapdata[map_idx][3]), snp.pos)
         self.assertEqual(mapdata[map_idx][1], snp.rsid)
         self.assertEqual(genotypes[index][0:9], list(snp.genotype_data))
         index += 1
         map_idx += 1
     self.assertEqual(7, index)
Ejemplo n.º 3
0
    def testPedigreeIndExclusionsMissingComplete(self):
        DataParser.ind_exclusions = ["11:11", "12:12"]
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        genotypes = [
            [0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
            [-1, -1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1],
            [-1, 2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
            [-1, 1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2],
            [-1, 2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
            [-1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
             [0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]
        ]
        index = 0
        for snp in ped_parser:

            self.assertEqual(genotypes[index][0:10], list(snp.genotype_data))
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 4
0
    def testPedigreeNoPheno(self):
        DataParser.has_pheno = False
        with open(self.ped_filename, "w") as f:
            f.write("""1 1 0 0 1 A A G T A A G G C T G T T T
2 2 0 0 1 A C G T G G C G T T G G C T
3 3 0 0 2 A A G G A G C C C C G T C T
4 4 0 0 2 A A G G A G C G C T G G T T
5 5 0 0 1 A C G G A A C G C C G G T T
6 6 0 0 1 A A G T A A G G C C G G T T
7 7 0 0 1 A A G T A A G G C T G G T T
8 8 0 0 1 A C G T G G C G T T G G C T
9 9 0 0 2 A A G G A G C C C C G T C T
10 10 0 0 2 A A G G A G C G C T G G T T
11 11 0 0 1 A C G G A A C G C C G G T T
12 12 0 0 1 A A G T A A G G C C G G T T""")

        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 5
0
    def testPedigreeWithLiability(self):
        DataParser.has_liability = True
        f = open(self.ped_filename, "w")
        f.write("""1 1 0 0 1 0.1 1 A A G T A A G G C T G T T T
2 2 0 0 1 0.4 1 A C G T G G C G T T G G C T
3 3 0 0 2 1.0 1 A A G G A G C C C C G T C T
4 4 0 0 2 0.5 1 A A G G A G C G C T G G T T
5 5 0 0 1 0.9 1 A C G G A A C G C C G G T T
6 6 0 0 1 1.0 1 A A G T A A G G C C G G T T
7 7 0 0 1 0.1 1 A A G T A A G G C T G G T T
8 8 0 0 1 0.4 1 A C G T G G C G T T G G C T
9 9 0 0 2 1.0 1 A A G G A G C C C C G T C T
10 10 0 0 2 0.5 1 A A G G A G C G C T G G T T
11 11 0 0 1 0.9 1 A C G G A A C G C C G G T T
12 12 0 0 1 1.0 1 A A G T A A G G C C G G T T""")

        f.close()
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 6
0
    def testPedigreeNoFamId(self):
        DataParser.has_fid = False
        f = open(self.ped_filename, "w")
        f.write("""1 0 0 1 0.1 A A G T A A G G C T G T T T
2 0 0 1 0.4 A C G T G G C G T T G G C T
3 0 0 2 1.0 A A G G A G C C C C G T C T
4 0 0 2 0.5 A A G G A G C G C T G G T T
5 0 0 1 0.9 A C G G A A C G C C G G T T
6 0 0 1 1.0 A A G T A A G G C C G G T T
7 0 0 1 0.1 A A G T A A G G C T G G T T
8 0 0 1 0.4 A C G T G G C G T T G G C T
9 0 0 2 1.0 A A G G A G C C C C G T C T
10 0 0 2 0.5 A A G G A G C G C T G G T T
11 0 0 1 0.9 A C G G A A C G C C G G T T
12 0 0 1 1.0 A A G T A A G G C C G G T T""")

        f.close()
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 7
0
    def testForInvariant(self):
        prefix = "__test_pedigree"
        self.pheno_file = "%s_mch.txt" % (prefix)
        with open(self.pheno_file, "w") as f:
            f.write("""FID\tIID\tBMI\tIBM\tMSA
1\t1\t0.1\t1.0\t1.0
2\t2\t0.2\t0.5\t1.0
3\t3\t0.3\t0.6\t1.0
4\t4\t0.4\t0.5\t1.0
5\t5\t0.5\t1.0\t1.0
6\t6\t0.6\t0.1\t1.0
17\t7\t0.1\t1.0\t1.0
8\t8\t0.2\t0.5\t1.0
9\t9\t0.3\t0.6\t1.0
10\t10\t0.4\t0.5\t1.0
11\t11\t0.5\t1.0\t1.0
12\t12\t0.6\t0.1\t1.0""")

        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        with open(self.pheno_file) as f2:
            pc.load_phenofile(f2, indices=[3])
            index = 0
            mapdata = get_lines(self.map_filename, split=True)

            with self.assertRaises(InvariantVar):
                for snp in ped_parser:
                    for y in pc:
                        non_missing = numpy.ones(len(snp.genotype_data),
                                                 dtype=bool)
                        (pheno, covariates, nonmissing) = y.get_variables(
                            numpy.invert(non_missing))
Ejemplo n.º 8
0
    def testPedigreeNoFamSexOrParents(self):
        DataParser.has_fid = False
        DataParser.has_sex = False
        DataParser.has_parents = False
        with open(self.ped_filename, "w") as f:
            f.write("""1 0.1 A A G T A A G G C T G T T T
2 0.4 A C G T G G C G T T G G C T
3 1.0 A A G G A G C C C C G T C T
4 0.5 A A G G A G C G C T G G T T
5 0.9 A C G G A A C G C C G G T T
6 1.0 A A G T A A G G C C G G T T
7 0.1 A A G T A A G G C T G G T T
8 0.4 A C G T G G C G T T G G C T
9 1.0 A A G G A G C C C C G T C T
10 0.5 A A G G A G C G C T G G T T
11 0.9 A C G G A A C G C C G G T T
12 1.0 A A G T A A G G C C G G T T""")

        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 9
0
    def testPedigreeIndExclusionsMissingIndThresh(self):
        pc = PhenoCovar()
        DataParser.ind_exclusions = ["11:11", "12:12"]
        DataParser.ind_miss_tol = 0.5  # We should only lose 1
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]
        genotypes = [
            [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
            [-1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1],
            [2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
            [1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2],
            [2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
            [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
        ]
        index = 0
        map_idx = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[map_idx][0]), snp.chr)
            self.assertEqual(int(mapdata[map_idx][3]), snp.pos)
            self.assertEqual(mapdata[map_idx][1], snp.rsid)
            self.assertEqual(genotypes[index][0:9], list(snp.genotype_data))
            index += 1
            map_idx += 1
        self.assertEqual(7, index)
Ejemplo n.º 10
0
    def testPedigreeNoPheno(self):
        DataParser.has_pheno = False
        f = open(self.ped_filename, "w")
        f.write(
            """1 1 0 0 1 A A G T A A G G C T G T T T
2 2 0 0 1 A C G T G G C G T T G G C T
3 3 0 0 2 A A G G A G C C C C G T C T
4 4 0 0 2 A A G G A G C G C T G G T T
5 5 0 0 1 A C G G A A C G C C G G T T
6 6 0 0 1 A A G T A A G G C C G G T T
7 7 0 0 1 A A G T A A G G C T G G T T
8 8 0 0 1 A C G T G G C G T T G G C T
9 9 0 0 2 A A G G A G C C C C G T C T
10 10 0 0 2 A A G G A G C G C T G G T T
11 11 0 0 1 A C G G A A C G C C G G T T
12 12 0 0 1 A A G T A A G G C C G G T T"""
        )

        f.close()
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 11
0
    def testPedigreeIndExclusionsMissingComplete(self):
        DataParser.ind_exclusions = ["11:11", "12:12"]
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        genotypes = [
            [0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
            [-1, -1, -1, -1, -1, 1, -1, -1, -1, 0, 0, 1],
            [-1, 2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
            [-1, 1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2],
            [-1, 2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
            [-1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
        ]
        index = 0
        for snp in ped_parser:

            self.assertEqual(genotypes[index][0:10], list(snp.genotype_data))
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 12
0
    def testMissingComplete(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename,
                                    self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        missing = 0
        valid = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)
                try:
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(self.missing_genotypes[index],
                                     list(genodata.genotypes))
                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    valid += 1
                except TooMuchMissing as e:
                    missing += 1
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(0, missing)
        self.assertEqual(7, valid)
Ejemplo n.º 13
0
    def testMissingSnpThresh(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5  # We should only lose 1
        ped_parser = PedigreeParser(self.map_filename,
                                    self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        #self.assertEqual([0,1,0,0,0,0,0,0,0,0,0,0], list(ped_parser.individual_mask))
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        missing = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)
                try:
                    genodata = snp.get_genotype_data(nonmissing)

                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    self.assertEqual(self.missing_genotypes[index],
                                     list(genodata.genotypes))
                except TooMuchMissing as e:
                    missing += 1
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(1, missing)
        self.assertEqual(7, index)
Ejemplo n.º 14
0
 def testMapFileWithRegionAndSnpExclusion(self):
     BoundaryCheck.chrom = 2
     DataParser.boundary = BoundaryCheck(bp=[0, 10000])
     ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
     ped_parser.load_mapfile()
     self.assertEqual(2, len(ped_parser.markers))
     self.assertEqual(7, len(ped_parser.snp_mask[:, 0]))
     self.assertEqual(2, ped_parser.locus_count)
     # Masks are filters, so we should have 7 entries, but 4 will be 1
     self.assertEqual(5, numpy.sum(ped_parser.snp_mask[:, 0]))
     self.assertEqual(0, ped_parser.snp_mask[4, 1])
     self.assertEqual(0, ped_parser.snp_mask[5, 0])
Ejemplo n.º 15
0
 def testMapFileWithRegionAndSnpExclusion(self):
     BoundaryCheck.chrom = 2
     DataParser.boundary = BoundaryCheck(bp=[0, 10000])
     ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
     ped_parser.load_mapfile()
     self.assertEqual(2, len(ped_parser.markers))
     self.assertEqual(7, len(ped_parser.snp_mask[:, 0]))
     self.assertEqual(2, ped_parser.locus_count)
     # Masks are filters, so we should have 7 entries, but 4 will be 1
     self.assertEqual(5, numpy.sum(ped_parser.snp_mask[:, 0]))
     self.assertEqual(0, ped_parser.snp_mask[4, 1])
     self.assertEqual(0, ped_parser.snp_mask[5, 0])
Ejemplo n.º 16
0
 def testMapFileWithSnpBoundary(self):
     BoundaryCheck.chrom = 1
     DataParser.boundary = SnpBoundaryCheck(snps=["rs0001-rs0003"])
     ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
     ped_parser.load_mapfile()
     self.assertEqual(3, len(ped_parser.markers))
     self.assertEqual(7, len(ped_parser.snp_mask))
     self.assertEqual(3, ped_parser.locus_count)
     # Masks are filters, so we should have 7 entries, but 4 will be 1
     self.assertEqual(4, numpy.sum(ped_parser.snp_mask[:, 0]))
     self.assertEqual(0, ped_parser.snp_mask[0, 0])
     self.assertEqual(0, ped_parser.snp_mask[1, 1])
     self.assertEqual(0, ped_parser.snp_mask[2, 1])
Ejemplo n.º 17
0
 def testMapFileWithSnpBoundary(self):
     BoundaryCheck.chrom = 1
     DataParser.boundary = SnpBoundaryCheck(snps=["rs0001-rs0003"])
     ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
     ped_parser.load_mapfile()
     self.assertEqual(3, len(ped_parser.markers))
     self.assertEqual(7, len(ped_parser.snp_mask))
     self.assertEqual(3, ped_parser.locus_count)
     # Masks are filters, so we should have 7 entries, but 4 will be 1
     self.assertEqual(4, numpy.sum(ped_parser.snp_mask[:, 0]))
     self.assertEqual(0, ped_parser.snp_mask[0, 0])
     self.assertEqual(0, ped_parser.snp_mask[1, 1])
     self.assertEqual(0, ped_parser.snp_mask[2, 1])
Ejemplo n.º 18
0
 def testMap3File(self):
     ped_parser = PedigreeParser(self.map3_filename, self.ped_filename)
     ped_parser.load_mapfile(map3=True)
     self.assertEqual(7, len(ped_parser.markers))
     self.assertEqual(7, len(ped_parser.snp_mask))
     self.assertEqual(7, ped_parser.locus_count)
     chrom = [int(a) for a in "1,1,1,1,2,2,2".split(",")]
     self.assertEqual(chrom, list(ped_parser.markers[:, 0]))
     self.assertEqual("rs0001", ped_parser.rsids[0])
     self.assertEqual("rs0005", ped_parser.rsids[4])
     self.assertEqual("rs0007", ped_parser.rsids[6])
     self.assertEqual([500, 10000, 25000, 45000, 750, 10000, 25000], list(ped_parser.markers[:, 1]))
     # Masks are filters, so we should have 7 entries, but none will be 1
     self.assertEqual(0, numpy.sum(ped_parser.snp_mask))
Ejemplo n.º 19
0
 def testMap3File(self):
     ped_parser = PedigreeParser(self.map3_filename, self.ped_filename)
     ped_parser.load_mapfile(map3=True)
     self.assertEqual(7, len(ped_parser.markers))
     self.assertEqual(7, len(ped_parser.snp_mask))
     self.assertEqual(7, ped_parser.locus_count)
     chrom = [int(a) for a in "1,1,1,1,2,2,2".split(",")]
     self.assertEqual(chrom, list(ped_parser.markers[:, 0]))
     self.assertEqual("rs0001", ped_parser.rsids[0])
     self.assertEqual("rs0005", ped_parser.rsids[4])
     self.assertEqual("rs0007", ped_parser.rsids[6])
     self.assertEqual([500, 10000, 25000, 45000, 750, 10000, 25000], list(ped_parser.markers[:, 1]))
     # Masks are filters, so we should have 7 entries, but none will be 1
     self.assertEqual(0, numpy.sum(ped_parser.snp_mask))
Ejemplo n.º 20
0
    def testPedNegativePositions(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 21
0
    def testWithFewIndividuals(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.miniped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]
        genotypes = [[0, 1, 0], [1, 1, 0], [0, 2, 1], [0, 1, 1], [1, 0, 1], [1, 0, 1], [0, 1, 1]]
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 22
0
    def testPedNegativePositions(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 23
0
    def testMissingComplete(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 0
        for snp in ped_parser:
            self.assertEqual(self.missing_genotypes[index], list(snp.genotype_data))
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 24
0
    def testPedNegativePositionsLocalChrom(self):
        BoundaryCheck.chrom = 1
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(4, index)
Ejemplo n.º 25
0
    def testWithFewIndividuals(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.miniped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        genotypes = [[0, 1, 0], [1, 1, 0], [0, 2, 1], [0, 1, 1], [1, 0, 1],
                     [1, 0, 1], [0, 1, 1]]
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 26
0
    def testPedigreeIndExclusionsComplete(self):
        DataParser.ind_exclusions = ["11:11", "12:12"]
        pc = PhenoCovar()

        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index][0:10],
                             list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 27
0
    def testPedBoundary(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        DataParser.boundary = BoundaryCheck()
        BoundaryCheck.chrom = 2
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        pedigree = get_lines(self.map_filename, split=True)

        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 28
0
    def testPedNegativePosLocalChromMissSNP(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary.LoadExclusions(snps=["rs0004"])

        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(3, index)
Ejemplo n.º 29
0
    def testPedNegativePosLocalChromMissSNP(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary.LoadExclusions(snps=["rs0004"])

        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(3, index)
Ejemplo n.º 30
0
    def testPedBoundary(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        DataParser.boundary = BoundaryCheck()
        BoundaryCheck.chrom = 2
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        pedigree = [x.split() for x in open(self.map_filename).readlines()]

        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 31
0
    def testMissingIndThresh(self):
        pc = PhenoCovar()
        DataParser.ind_miss_tol = 0.5  # We should only lose 1
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 0
        map_idx = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[map_idx][0]), snp.chr)
            self.assertEqual(int(mapdata[map_idx][3]), snp.pos)
            self.assertEqual(mapdata[map_idx][1], snp.rsid)
            self.assertEqual(self.missing_genotypes[index][1:], list(snp.genotype_data))
            index += 1
            map_idx += 1
        self.assertEqual(7, index)
Ejemplo n.º 32
0
    def testMissingSnpThresh(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5  # We should only lose 1
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        # self.assertEqual([0,1,0,0,0,0,0,0,0,0,0,0], list(ped_parser.individual_mask))
        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 0
        for snp in ped_parser:
            if index == 1:  # This gets dropped due to missingness
                index += 1
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.missing_genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 33
0
    def testPedMultiPheno(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        with open(self.pheno_file) as f:
            pc.load_phenofile(f, indices=[2, 3])
        mapdata = get_lines(self.map_filename, split=True)

        sex = [1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1]
        pheno_data = [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
                      [1.0, 0.5, 0.6, 0.5, 1.0, 0.1],
                      [0.5, 1.0, 0.1, 0.5, 1.0, 0.2]]
        dual_pheno = [[1.0, 0.5, 0.6, 0.5, 1.0, 0.1, 0.5, 0.6, 0.5, 1.0, 0.1],
                      [0.5, 1.0, 0.1, 0.5, 1.0, 0.2, 1.0, 0.1, 0.5, 1.0, 0.2]]

        self.assertEqual(2, len(pc.phenotype_data))
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            idx = 0
            for y in pc:
                non_missing = numpy.ones(len(snp.genotype_data), dtype=bool)
                non_missing[6] = False
                (pheno, covariates,
                 nm_indata) = y.get_variables(numpy.invert(non_missing))
                for i in range(0, 11):
                    self.assertEqual(sex[i], covariates[0][i])
                    self.assertAlmostEqual(dual_pheno[idx][i], pheno[i])

                idx += 1

            self.assertEqual(2, idx)

            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 34
0
    def testMissingIndThresh(self):
        pc = PhenoCovar()
        DataParser.ind_miss_tol = 0.5       # We should only lose 1
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]


        index = 0
        map_idx = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[map_idx][0]), snp.chr)
            self.assertEqual(int(mapdata[map_idx][3]), snp.pos)
            self.assertEqual(mapdata[map_idx][1], snp.rsid)
            self.assertEqual(self.missing_genotypes[index][1:], list(snp.genotype_data))
            index += 1
            map_idx += 1
        self.assertEqual(7, index)
Ejemplo n.º 35
0
    def testMissingSnpThresh(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5       # We should only lose 1
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename_missing)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        #self.assertEqual([0,1,0,0,0,0,0,0,0,0,0,0], list(ped_parser.individual_mask))
        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]


        index = 0
        for snp in ped_parser:
            if index == 1:      # This gets dropped due to missingness
                index += 1
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.missing_genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 36
0
    def testPedMultiPheno(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        pc.load_phenofile(open(self.pheno_file), indices=[2, 3])
        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        sex = [1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1]
        pheno_data = [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], [1.0, 0.5, 0.6, 0.5, 1.0, 0.1], [0.5, 1.0, 0.1, 0.5, 1.0, 0.2]]
        dual_pheno = [
            [1.0, 0.5, 0.6, 0.5, 1.0, 0.1, 0.5, 0.6, 0.5, 1.0, 0.1],
            [0.5, 1.0, 0.1, 0.5, 1.0, 0.2, 1.0, 0.1, 0.5, 1.0, 0.2],
        ]

        self.assertEqual(2, len(pc.phenotype_data))
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            idx = 0
            for y in pc:
                non_missing = numpy.ones(len(snp.genotype_data), dtype=bool)
                non_missing[6] = False
                (pheno, covariates, nm_indata) = y.get_variables(numpy.invert(non_missing))
                for i in range(0, 11):
                    self.assertEqual(sex[i], covariates[0][i])
                    self.assertAlmostEqual(dual_pheno[idx][i], pheno[i])

                idx += 1

            self.assertEqual(2, idx)

            index += 1
        self.assertEqual(7, index)
Ejemplo n.º 37
0
    def testPedCompleteAlternateIteration(self):
        """Useful if you need to iterate over these in a more controlled manner"""
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        index = 0
        snp = ped_parser.__iter__().next()
        try:
            while True:
                self.assertEqual(int(mapdata[index][0]), snp.chr)
                self.assertEqual(int(mapdata[index][3]), snp.pos)
                self.assertEqual(mapdata[index][1], snp.rsid)
                self.assertEqual(self.genotypes[index], list(snp.genotype_data))
                index += 1
                snp.next()

        except StopIteration:
            pass
        self.assertEqual(7, index)
Ejemplo n.º 38
0
    def testPedCompleteAlternateIteration(self):
        """Useful if you need to iterate over these in a more controlled manner"""
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 0
        snp = next(ped_parser.__iter__())
        try:
            while True:
                self.assertEqual(int(mapdata[index][0]), snp.chr)
                self.assertEqual(int(mapdata[index][3]), snp.pos)
                self.assertEqual(mapdata[index][1], snp.rsid)
                self.assertEqual(self.genotypes[index],
                                 list(snp.genotype_data))
                index += 1
                next(snp)

        except StopIteration:
            pass
        self.assertEqual(7, index)
Ejemplo n.º 39
0
    def testForInvariant(self):
        prefix = "__test_pedigree"
        self.pheno_file = "%s_mch.txt" % (prefix)
        f = open(self.pheno_file, "w")
        f.write(
            """FID\tIID\tBMI\tIBM\tMSA
1\t1\t0.1\t1.0\t1.0
2\t2\t0.2\t0.5\t1.0
3\t3\t0.3\t0.6\t1.0
4\t4\t0.4\t0.5\t1.0
5\t5\t0.5\t1.0\t1.0
6\t6\t0.6\t0.1\t1.0
17\t7\t0.1\t1.0\t1.0
8\t8\t0.2\t0.5\t1.0
9\t9\t0.3\t0.6\t1.0
10\t10\t0.4\t0.5\t1.0
11\t11\t0.5\t1.0\t1.0
12\t12\t0.6\t0.1\t1.0"""
        )
        f.close()

        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        pc.load_phenofile(open(self.pheno_file), indices=[3])
        index = 0

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        with self.assertRaises(InvariantVar):
            for snp in ped_parser:
                for y in pc:
                    non_missing = numpy.ones(len(snp.genotype_data), dtype=bool)
                    (pheno, covariates, nonmissing) = y.get_variables(numpy.invert(non_missing))