Python ADAMContext.loadGenotypes Exemples, bdgenomics.adam.adamContext.ADAMContext.loadGenotypes Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : karenfeng/adam

    def test_vcf_round_trip(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().saveAsVcf(tmpPath)

        savedGenotypes = ac.loadGenotypes(testFile)

        self.assertEquals(genotypes._jvmRdd.jrdd().count(),
                          savedGenotypes._jvmRdd.jrdd().count())

Exemple #2

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : seshendranath/adam

    def test_vcf_round_trip(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.sc)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().saveAsVcf(tmpPath)

        savedGenotypes = ac.loadGenotypes(testFile)

        self.assertEquals(genotypes._jvmRdd.jrdd().count(),
                          savedGenotypes._jvmRdd.jrdd().count())

Exemple #3

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : karenfeng/adam

    def test_transform(self):
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.ss)

        genotypes = ac.loadGenotypes(testFile)

        transformedGenotypes = genotypes.transform(lambda x: x.filter(x.contigName == '1'))

        self.assertEquals(transformedGenotypes.toDF().count(), 9)

Exemple #4

0

Afficher le fichier

    def test_VariantsPerSampleDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data = VariantsPerSampleDistribution(self.ss, genotypes).plotDistributions(testMode= True)

        expected = [6, 8, 8, 1, 7, 8]
        assert(sum(data) == sum(expected))

Exemple #5

0

Afficher le fichier

Fichier : adamContext_test.py Projet : yumatchlab/adam

    def test_load_genotypes(self):

        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.sc)

        reads = ac.loadGenotypes(testFile)

        self.assertEqual(reads.toDF().count(), 18)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 18)

Exemple #6

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : seshendranath/adam

    def test_transform(self):
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.sc)

        genotypes = ac.loadGenotypes(testFile)

        transformedGenotypes = genotypes.transform(lambda x: x.filter(x.contigName == '1'))

        self.assertEquals(transformedGenotypes.toDF().count(), 9)

Exemple #7

0

Afficher le fichier

Fichier : adamContext_test.py Projet : bigdatagenomics/adam

    def test_load_genotypes(self):

        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadGenotypes(testFile)

        self.assertEqual(reads.toDF().count(), 18)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 18)

Exemple #8

0

Afficher le fichier

    def test_HetHomRatioDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data =  HetHomRatioDistribution(self.ss, genotypes, sample=1.0).plot(testMode= True)
        expected = sorted([5.0, 0.6, 0.14, 0.17, 1.67])
        sorted_data = sorted(data)

        assert( expected == [ round(x,2) for x in sorted_data ])

Exemple #9

0

Afficher le fichier

    def test_GenotypeCallRatesDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data =  GenotypeCallRatesDistribution(self.ss, genotypes, sample=1.0).plot(testMode= True)
        expected = sorted([0.95, 0.88, 0.89, 0.94, 0.93, 0.90])
        sorted_data = sorted(data)

        assert( expected == [ round(x,2) for x in sorted_data] )

Exemple #10

0

Afficher le fichier

Fichier : genotypeDataset_test.py Projet : stjordanis/adam

    def test_vcf_add_filter(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addFilterHeaderLine("BAD",
                                                          "Bad variant.").saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath, '##FILTER=<ID=BAD,Description="Bad variant.">')

Exemple #11

0

Afficher le fichier

Fichier : genotypeDataset_test.py Projet : stjordanis/adam

    def test_vcf_sort(self):
    
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().sort().saveAsVcf(tmpPath,
                                                       asSingleFile=True)

        self.checkFiles(tmpPath, self.resourceFile("sorted.vcf", module='adam-cli'))

Exemple #12

0

Afficher le fichier

    def test_VariantsPerSampleDistributionSampling(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data = VariantsPerSampleDistribution(self.ss, genotypes, sample=0.9).plotDistributions(testMode= True)

        expected = [6, 8, 8, 1, 7, 8]

        # estimated counts should be around real counts
        dev = 8
        assert(sum(expected) > sum(data) - dev and sum(expected) < sum(data) + dev)

Exemple #13

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : karenfeng/adam

    def test_vcf_add_filter(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addFilterHeaderLine("BAD",
                                                          "Bad variant.").saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath, '##FILTER=<ID=BAD,Description="Bad variant.">')

Exemple #14

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : karenfeng/adam

    def test_vcf_sort_lex(self):
    
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().sortLexicographically().saveAsVcf(tmpPath,
                                                                        asSingleFile=True)

        self.checkFiles(tmpPath, self.resourceFile("sorted.lex.vcf", module='adam-cli'))

Exemple #15

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : seshendranath/adam

    def test_vcf_sort_lex(self):
    
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.sc)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().sortLexicographically().saveAsVcf(tmpPath,
                                                                        asSingleFile=True)

        self.checkFiles(tmpPath, self.resourceFile("sorted.lex.vcf"))

Exemple #16

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : laserson/adam

    def test_vcf_sort(self):
    
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.sc)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContextRDD().sort().saveAsVcf(tmpPath,
                                                         asSingleFile=True)

        self.checkFiles(tmpPath, self.resourceFile("sorted.vcf"))

Exemple #17

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : nick-manasys/adam

    def test_to_variants(self):
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        genotypes = ac.loadGenotypes(testFile)

        variants = genotypes.toVariants()

        self.assertEqual(variants.toDF().count(), 18)

        variants = genotypes.toVariants(dedupe=True)

        self.assertEqual(variants.toDF().count(), 6)

Exemple #18

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : nick-manasys/adam

    def test_vcf_add_info_scalar(self):

        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addScalarInfoHeaderLine(
            "SC", "Scalar.", bool).saveAsVcf(tmpPath)

        self.check_for_line_in_file(
            tmpPath, '##INFO=<ID=SC,Number=0,Type=Flag,Description="Scalar.">')

Exemple #19

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : karenfeng/adam

    def test_to_variants(self):
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        genotypes = ac.loadGenotypes(testFile)

        variants = genotypes.toVariants()

        self.assertEquals(variants.toDF().count(), 18)

        variants = genotypes.toVariants(dedupe=True)

        self.assertEquals(variants.toDF().count(), 6)

Exemple #20

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : karenfeng/adam

    def test_vcf_add_info_all_array(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addAllAlleleArrayInfoHeaderLine("RA",
                                                                      "Array with # alleles.",
                                                                      float).saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath,
                                    '##INFO=<ID=RA,Number=R,Type=Float,Description="Array with # alleles.">')

Exemple #21

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : karenfeng/adam

    def test_vcf_add_info_scalar(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addScalarInfoHeaderLine("SC",
                                                              "Scalar.",
                                                              bool).saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath,
                                    '##INFO=<ID=SC,Number=0,Type=Flag,Description="Scalar.">')

Exemple #22

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : karenfeng/adam

    def test_vcf_add_format_alts_array(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addAlternateAlleleArrayFormatHeaderLine("AA",
                                                                              "Array with # alts.",
                                                                              chr).saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath,
                                    '##FORMAT=<ID=AA,Number=A,Type=Character,Description="Array with # alts.">')

Exemple #23

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : karenfeng/adam

    def test_vcf_add_format_genotype_array(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addGenotypeArrayFormatHeaderLine("GA",
                                                                       "Array with # genotypes.",
                                                                       float).saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath,
                                    '##FORMAT=<ID=GA,Number=G,Type=Float,Description="Array with # genotypes.">')

Exemple #24

0

Afficher le fichier

Fichier : genotypeDataset_test.py Projet : stjordanis/adam

    def test_vcf_add_format_genotype_array(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addGenotypeArrayFormatHeaderLine("GA",
                                                                       "Array with # genotypes.",
                                                                       float).saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath,
                                    '##FORMAT=<ID=GA,Number=G,Type=Float,Description="Array with # genotypes.">')

Exemple #25

0

Afficher le fichier

Fichier : genotypeDataset_test.py Projet : stjordanis/adam

    def test_vcf_add_format_alts_array(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addAlternateAlleleArrayFormatHeaderLine("AA",
                                                                              "Array with # alts.",
                                                                              chr).saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath,
                                    '##FORMAT=<ID=AA,Number=A,Type=Character,Description="Array with # alts.">')

Exemple #26

0

Afficher le fichier

Fichier : genotypeDataset_test.py Projet : stjordanis/adam

    def test_vcf_add_info_all_array(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addAllAlleleArrayInfoHeaderLine("RA",
                                                                      "Array with # alleles.",
                                                                      float).saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath,
                                    '##INFO=<ID=RA,Number=R,Type=Float,Description="Array with # alleles.">')

Exemple #27

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : karenfeng/adam

    def test_vcf_add_format_scalar(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addScalarFormatHeaderLine("SC",
                                                                "Scalar.",
                                                                str).saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath,
                                    '##FORMAT=<ID=SC,Number=1,Type=String,Description="Scalar.">')

Exemple #28

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : nick-manasys/adam

    def test_vcf_add_format_scalar(self):

        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addScalarFormatHeaderLine(
            "SC", "Scalar.", str).saveAsVcf(tmpPath)

        self.check_for_line_in_file(
            tmpPath,
            '##FORMAT=<ID=SC,Number=1,Type=String,Description="Scalar.">')

Exemple #29

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : karenfeng/adam

    def test_vcf_add_format_array(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addFixedArrayFormatHeaderLine("FA4",
                                                                    4,
                                                                    "Fixed array of 4 elements.",
                                                                    int).saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath,
                                    '##FORMAT=<ID=FA4,Number=4,Type=Integer,Description="Fixed array of 4 elements.">')

Exemple #30

0

Afficher le fichier

Fichier : genotypeRdd_test.py Projet : nick-manasys/adam

    def test_vcf_add_format_array(self):

        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addFixedArrayFormatHeaderLine(
            "FA4", 4, "Fixed array of 4 elements.", int).saveAsVcf(tmpPath)

        self.check_for_line_in_file(
            tmpPath,
            '##FORMAT=<ID=FA4,Number=4,Type=Integer,Description="Fixed array of 4 elements.">'
        )

Exemple #31

0

Afficher le fichier

    def test_visualize_genotypes(self):
        # load file
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        # read features
        genotypes = ac.loadGenotypes(testFile)

        gs =  GenotypeSummary(self.ss, ac, genotypes)

        contig = "chr22"
        start = 21079600
        end = 21079700

        x = gs.viewPileup(contig, start, end)
        assert(x != None)