コード例 #1
0
ファイル: distribution_test.py プロジェクト: rongshanyu/mango
    def test_cumulative_count_distribution(self):
        # load file
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("small.sam")
        # read alignments

        reads = ac.loadAlignments(testFile)

        # convert to coverage
        coverage = reads.toCoverage()

        qc = CoverageDistribution(self.ss, coverage)

        _, cd = qc.plotDistributions(testMode = True, cumulative = True, normalize = False)

        # first sample
        items = list(cd.popitem()[1])
        assert(len(items) == 1)
        assert(items.pop()[1] == 1500)

        _, cd = qc.plotDistributions(testMode = True, cumulative = False, normalize = False)

        # first sample
        items = list(cd.popitem()[1])
        assert(len(items) == 1)
        assert(items.pop()[1] == 1500)
コード例 #2
0
ファイル: coverageRdd_test.py プロジェクト: karenfeng/adam
    def test_collapse(self):
        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        collapsed = coverage.collapse()
        self.assertEquals(collapsed.toDF().count(), coverage.toDF().count())
コード例 #3
0
ファイル: coverageRdd_test.py プロジェクト: karenfeng/adam
    def test_aggregatedCoverage(self):
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        collapsed = coverage.aggregatedCoverage(10)
        self.assertEquals(collapsed.toDF().count(), 166)
コード例 #4
0
ファイル: coverageRdd_test.py プロジェクト: karenfeng/adam
    def test_flatten(self):
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        flattened = coverage.flatten()
        self.assertEquals(flattened.toDF().count(), 1500)
コード例 #5
0
    def test_collapse(self):
        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        collapsed = coverage.collapse()
        self.assertEquals(collapsed.toDF().count(), coverage.toDF().count())
コード例 #6
0
    def test_aggregatedCoverage(self):
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        collapsed = coverage.aggregatedCoverage(10)
        self.assertEquals(collapsed.toDF().count(), 166)
コード例 #7
0
ファイル: adamContext_test.py プロジェクト: yumatchlab/adam
    def test_load_coverage(self):

        testFile = self.resourceFile("sample_coverage.bed")
        ac = ADAMContext(self.sc)

        coverage = ac.loadCoverage(testFile)

        self.assertEqual(coverage.toDF().count(), 3)
コード例 #8
0
    def test_flatten(self):
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        flattened = coverage.flatten()
        self.assertEquals(flattened.toDF().count(), 1500)
コード例 #9
0
ファイル: adamContext_test.py プロジェクト: wwjiang007/adam
    def test_load_slices(self):

        testFile = self.resourceFile("HLA_DQB1_05_01_01_02.fa")
        ac = ADAMContext(self.ss)

        slices = ac.loadSlices(testFile, 10000)

        self.assertEqual(slices.toDF().count(), 1)
        self.assertEqual(slices._jvmDataset.jrdd().count(), 1)
コード例 #10
0
    def test_load_coverage(self):


        testFile = self.resourceFile("sample_coverage.bed")
        ac = ADAMContext(self.ss)

        coverage = ac.loadCoverage(testFile)

        self.assertEqual(coverage.toDF().count(), 3)
コード例 #11
0
    def test_load_interval_list(self):

        testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 369)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 369)
コード例 #12
0
    def test_load_bed(self):

        testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
コード例 #13
0
    def test_load_gtf(self):

        testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 15)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 15)
コード例 #14
0
    def test_load_alignments(self):
        
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadAlignments(testFile)

        self.assertEqual(reads.toDF().count(), 20)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 20)
コード例 #15
0
ファイル: adamContext_test.py プロジェクト: wwjiang007/adam
    def test_load_dna_sequences(self):

        testFile = self.resourceFile("HLA_DQB1_05_01_01_02.fa")
        ac = ADAMContext(self.ss)

        sequences = ac.loadDnaSequences(testFile)

        self.assertEqual(sequences.toDF().count(), 1)
        self.assertEqual(sequences._jvmDataset.jrdd().count(), 1)
コード例 #16
0
    def test_VariantsPerSampleDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data = VariantsPerSampleDistribution(self.ss, genotypes).plotDistributions(testMode= True)

        expected = [6, 8, 8, 1, 7, 8]
        assert(sum(data) == sum(expected))
コード例 #17
0
ファイル: adamContext_test.py プロジェクト: yyz940922/adam
    def test_load_interval_list(self):

        testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 369)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 369)
コード例 #18
0
ファイル: adamContext_test.py プロジェクト: yyz940922/adam
    def test_load_bed(self):

        testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
コード例 #19
0
    def test_transform(self):
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.sc)

        genotypes = ac.loadGenotypes(testFile)

        transformedGenotypes = genotypes.transform(lambda x: x.filter(x.contigName == '1'))

        self.assertEquals(transformedGenotypes.toDF().count(), 9)
コード例 #20
0
    def test_count_kmers(self):

        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        kmers = reads.countKmers(6)

        self.assertEqual(kmers.count(), 1040)
コード例 #21
0
ファイル: adamContext_test.py プロジェクト: yyz940922/adam
    def test_load_alignments(self):
        
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadAlignments(testFile)

        self.assertEqual(reads.toDF().count(), 20)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 20)
コード例 #22
0
    def test_load_genotypes(self):

        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        reads = ac.loadGenotypes(testFile)

        self.assertEqual(reads.toDF().count(), 18)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 18)
コード例 #23
0
    def test_to_fragments(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        fragments = reads.toFragments()
        self.assertEqual(fragments.toDF().count(), 5)
コード例 #24
0
ファイル: adamContext_test.py プロジェクト: yumatchlab/adam
    def test_load_contig_fragments(self):

        testFile = self.resourceFile("HLA_DQB1_05_01_01_02.fa")
        ac = ADAMContext(self.sc)

        reads = ac.loadContigFragments(testFile)

        self.assertEqual(reads.toDF().count(), 1)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 1)
コード例 #25
0
ファイル: adamContext_test.py プロジェクト: yyz940922/adam
    def test_load_gtf(self):

        testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 15)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 15)
コード例 #26
0
ファイル: adamContext_test.py プロジェクト: yumatchlab/adam
    def test_load_variants(self):

        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.sc)

        reads = ac.loadVariants(testFile)

        self.assertEqual(reads.toDF().count(), 6)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 6)
コード例 #27
0
ファイル: genotypeRdd_test.py プロジェクト: karenfeng/adam
    def test_transform(self):
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.ss)

        genotypes = ac.loadGenotypes(testFile)

        transformedGenotypes = genotypes.transform(lambda x: x.filter(x.contigName == '1'))

        self.assertEquals(transformedGenotypes.toDF().count(), 9)
コード例 #28
0
    def test_realignIndels_reads(self):

        readsPath = self.resourceFile("small.1.sam")

        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)
        realigned = reads.realignIndels()

        self.assertEqual(realigned.toDF().count(), 20)
コード例 #29
0
    def test_transform(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        transformedReads = reads.transform(lambda x: x.filter(x.referenceName == "1"))

        self.assertEqual(transformedReads.toDF().count(), 1)
コード例 #30
0
    def test_transform(self):

        featurePath = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)

        features = ac.loadFeatures(featurePath)

        transformedFeatures = features.transform(lambda x: x.filter(x.start < 12613))

        self.assertEqual(transformedFeatures.toDF().count(), 6)
コード例 #31
0
    def test_GenotypeCallRatesDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data =  GenotypeCallRatesDistribution(self.ss, genotypes, sample=1.0).plot(testMode= True)
        expected = sorted([0.95, 0.88, 0.89, 0.94, 0.93, 0.90])
        sorted_data = sorted(data)

        assert( expected == [ round(x,2) for x in sorted_data] )
コード例 #32
0
    def test_load_indexed_bam(self):

        readsPath = self.resourceFile("indexed_bams/sorted.bam")
        ac = ADAMContext(self.ss)

        querys = [ReferenceRegion("chr2", 100, 101), ReferenceRegion("3", 10, 17)]

        reads = ac.loadIndexedBam(readsPath, querys)

        self.assertEqual(reads.toDF().count(), 2)
コード例 #33
0
    def test_load_narrowPeak(self):

        
        testFile = self.resourceFile("wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
コード例 #34
0
    def test_transform(self):

        variantPath = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        variants = ac.loadVariants(variantPath)

        transformedVariants = variants.transform(lambda x: x.filter(x.start < 19190))

        self.assertEqual(transformedVariants.toDF().count(), 3)
コード例 #35
0
    def test_HetHomRatioDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data =  HetHomRatioDistribution(self.ss, genotypes, sample=1.0).plot(testMode= True)
        expected = sorted([5.0, 0.6, 0.14, 0.17, 1.67])
        sorted_data = sorted(data)

        assert( expected == [ round(x,2) for x in sorted_data ])
コード例 #36
0
    def test_load_variants(self):

        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadVariants(testFile)

        self.assertEqual(reads.toDF().count(), 6)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 6)
コード例 #37
0
    def test_toFeatures(self):
        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        features = coverage.toFeatures()

        assert (isinstance(features, FeatureRDD))
        self.assertEquals(features.toDF().count(), coverage.toDF().count())
コード例 #38
0
    def test_load_contig_fragments(self):


        testFile = self.resourceFile("HLA_DQB1_05_01_01_02.fa")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadContigFragments(testFile)

        self.assertEqual(reads.toDF().count(), 1)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 1)
コード例 #39
0
ファイル: coverageRdd_test.py プロジェクト: karenfeng/adam
    def test_toFeatures(self):
        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        features = coverage.toFeatures()

        assert(isinstance(features, FeatureRDD))
        self.assertEquals(features.toDF().count(), coverage.toDF().count())
コード例 #40
0
    def test_save(self):

        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        tmpPath = self.tmpFile() + ".coverage.adam"
        coverage.save(tmpPath, asSingleFile=True, disableFastConcat=True)
        assert (os.listdir(tmpPath) != [])
コード例 #41
0
    def test_save_unordered_sam(self):

        testFile = self.resourceFile("unordered.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        tmpPath = self.tmpFile() + ".sam"
        reads.saveAsSam(tmpPath, asSingleFile=True)

        self.checkFiles(testFile, tmpPath)
コード例 #42
0
    def test_transform(self):

        featurePath = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)

        features = ac.loadFeatures(featurePath)

        transformedFeatures = features.transform(lambda x: x.filter(x.start < 12613))

        self.assertEquals(transformedFeatures.toDF().count(), 6)
コード例 #43
0
    def test_transform(self):

        variantPath = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        variants = ac.loadVariants(variantPath)

        transformedVariants = variants.transform(lambda x: x.filter(x.start < 19190))

        self.assertEquals(transformedVariants.toDF().count(), 3)
コード例 #44
0
ファイル: adamContext_test.py プロジェクト: yyz940922/adam
    def test_load_indexed_bam(self):

        testFile = self.resourceFile("indexed_bams/sorted.bam")
        ac = ADAMContext(self.ss)

        reads = ac.loadIndexedBam(testFile,
                                  [ReferenceRegion("chr2", 100, 101),
                                   ReferenceRegion("3", 10, 17)])

        self.assertEqual(reads.toDF().count(), 2)
コード例 #45
0
ファイル: adamContext_test.py プロジェクト: yyz940922/adam
    def test_load_narrowPeak(self):

        
        testFile = self.resourceFile("wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
コード例 #46
0
    def test_filterByOverlappingRegion(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        query = ReferenceRegion("chr2", 1, 400)

        filtered = reads.filterByOverlappingRegion(query)
        self.assertEqual(filtered.toDF().count(), 1)
コード例 #47
0
    def test_save_unordered_sam(self):

        testFile = self.resourceFile("unordered.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        tmpPath = self.tmpFile() + ".sam"
        reads.saveAsSam(tmpPath,
                        asSingleFile=True)

        self.checkFiles(testFile, tmpPath)
コード例 #48
0
ファイル: genotypeRdd_test.py プロジェクト: karenfeng/adam
    def test_vcf_sort_lex(self):
    
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().sortLexicographically().saveAsVcf(tmpPath,
                                                                        asSingleFile=True)

        self.checkFiles(tmpPath, self.resourceFile("sorted.lex.vcf", module='adam-cli'))
コード例 #49
0
ファイル: coverageRdd_test.py プロジェクト: karenfeng/adam
    def test_save(self):

        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        tmpPath = self.tmpFile() + ".coverage.adam"
        coverage.save(tmpPath,
                              asSingleFile=True,
                            disableFastConcat=True)
    	assert(os.listdir(tmpPath) != [])
コード例 #50
0
ファイル: genotypeRdd_test.py プロジェクト: laserson/adam
    def test_vcf_sort(self):
    
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.sc)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContextRDD().sort().saveAsVcf(tmpPath,
                                                         asSingleFile=True)

        self.checkFiles(tmpPath, self.resourceFile("sorted.vcf"))
コード例 #51
0
    def test_filterByOverlappingRegions(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        querys = [ReferenceRegion("chr2", 1L, 400L),
                    ReferenceRegion("3", 1L, 100L)]

        filtered = reads.filterByOverlappingRegions(querys)
        self.assertEquals(filtered.toDF().count(), 2)
コード例 #52
0
    def test_union(self):

        testFile1 = self.resourceFile("sorted.sam")
        testFile2 = self.resourceFile("unordered.sam")
        ac = ADAMContext(self.ss)

        reads1 = ac.loadAlignments(testFile1)
        reads2 = ac.loadAlignments(testFile2)

        unionReads = reads1.union([reads2])

        self.assertEqual(unionReads.toDF().count(), 13)
コード例 #53
0
ファイル: genotypeRdd_test.py プロジェクト: karenfeng/adam
    def test_vcf_add_filter(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addFilterHeaderLine("BAD",
                                                          "Bad variant.").saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath, '##FILTER=<ID=BAD,Description="Bad variant.">')
コード例 #54
0
    def test_to_coverage(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        coverage = reads.toCoverage()
        self.assertEquals(coverage.toDF().count(), 42)

        coverage = reads.toCoverage(collapse = False)
        self.assertEquals(coverage.toDF().count(), 46)
コード例 #55
0
ファイル: genotypeRdd_test.py プロジェクト: karenfeng/adam
    def test_to_variants(self):
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        genotypes = ac.loadGenotypes(testFile)

        variants = genotypes.toVariants()

        self.assertEquals(variants.toDF().count(), 18)

        variants = genotypes.toVariants(dedupe=True)

        self.assertEquals(variants.toDF().count(), 6)