예제 #1
0
    def test_cumulative_count_distribution(self):
        # load file
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("small.sam")
        # read alignments

        reads = ac.loadAlignments(testFile)

        # convert to coverage
        coverage = reads.toCoverage()

        qc = CoverageDistribution(self.ss, coverage)

        _, cd = qc.plotDistributions(testMode = True, cumulative = True, normalize = False)

        # first sample
        items = list(cd.popitem()[1])
        assert(len(items) == 1)
        assert(items.pop()[1] == 1500)

        _, cd = qc.plotDistributions(testMode = True, cumulative = False, normalize = False)

        # first sample
        items = list(cd.popitem()[1])
        assert(len(items) == 1)
        assert(items.pop()[1] == 1500)
예제 #2
0
 def __init__(self, sc, build='hg19'):
     """
     Initializes a GenomicRDD viz class.
     """
     self.ac = ADAMContext(sc)
     self.build = build
     self.chrPrefix = 'chr'
예제 #3
0
    def test_collapse(self):
        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        collapsed = coverage.collapse()
        self.assertEquals(collapsed.toDF().count(), coverage.toDF().count())
예제 #4
0
    def test_flatten(self):
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        flattened = coverage.flatten()
        self.assertEquals(flattened.toDF().count(), 1500)
예제 #5
0
    def test_aggregatedCoverage(self):
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        collapsed = coverage.aggregatedCoverage(10)
        self.assertEquals(collapsed.toDF().count(), 166)
예제 #6
0
    def test_load_coverage(self):

        testFile = self.resourceFile("sample_coverage.bed")
        ac = ADAMContext(self.sc)

        coverage = ac.loadCoverage(testFile)

        self.assertEqual(coverage.toDF().count(), 3)
예제 #7
0
    def test_load_genotypes(self):

        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        reads = ac.loadGenotypes(testFile)

        self.assertEqual(reads.toDF().count(), 18)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 18)
예제 #8
0
    def test_load_dna_sequences(self):

        testFile = self.resourceFile("HLA_DQB1_05_01_01_02.fa")
        ac = ADAMContext(self.ss)

        sequences = ac.loadDnaSequences(testFile)

        self.assertEqual(sequences.toDF().count(), 1)
        self.assertEqual(sequences._jvmDataset.jrdd().count(), 1)
예제 #9
0
    def test_load_slices(self):

        testFile = self.resourceFile("HLA_DQB1_05_01_01_02.fa")
        ac = ADAMContext(self.ss)

        slices = ac.loadSlices(testFile, 10000)

        self.assertEqual(slices.toDF().count(), 1)
        self.assertEqual(slices._jvmDataset.jrdd().count(), 1)
예제 #10
0
    def test_load_interval_list(self):

        testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 369)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 369)
예제 #11
0
    def test_to_fragments(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        fragments = reads.toFragments()
        self.assertEqual(fragments.toDF().count(), 5)
예제 #12
0
    def test_VariantsPerSampleDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data = VariantsPerSampleDistribution(self.ss, genotypes).plotDistributions(testMode= True)

        expected = [6, 8, 8, 1, 7, 8]
        assert(sum(data) == sum(expected))
예제 #13
0
    def test_load_bed(self):

        testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
예제 #14
0
    def test_transform(self):
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.sc)

        genotypes = ac.loadGenotypes(testFile)

        transformedGenotypes = genotypes.transform(lambda x: x.filter(x.contigName == '1'))

        self.assertEquals(transformedGenotypes.toDF().count(), 9)
예제 #15
0
    def test_load_gtf(self):

        testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 15)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 15)
예제 #16
0
    def test_load_alignments(self):
        
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadAlignments(testFile)

        self.assertEqual(reads.toDF().count(), 20)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 20)
예제 #17
0
    def test_load_variants(self):

        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.sc)

        reads = ac.loadVariants(testFile)

        self.assertEqual(reads.toDF().count(), 6)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 6)
예제 #18
0
    def test_count_kmers(self):

        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        kmers = reads.countKmers(6)

        self.assertEqual(kmers.count(), 1040)
예제 #19
0
    def test_load_contig_fragments(self):

        testFile = self.resourceFile("HLA_DQB1_05_01_01_02.fa")
        ac = ADAMContext(self.sc)

        reads = ac.loadContigFragments(testFile)

        self.assertEqual(reads.toDF().count(), 1)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 1)
예제 #20
0
    def test_load_indexed_bam(self):

        testFile = self.resourceFile("indexed_bams/sorted.bam")
        ac = ADAMContext(self.ss)

        reads = ac.loadIndexedBam(testFile,
                                  [ReferenceRegion("chr2", 100, 101),
                                   ReferenceRegion("3", 10, 17)])

        self.assertEqual(reads.toDF().count(), 2)
예제 #21
0
    def test_GenotypeCallRatesDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data =  GenotypeCallRatesDistribution(self.ss, genotypes, sample=1.0).plot(testMode= True)
        expected = sorted([0.95, 0.88, 0.89, 0.94, 0.93, 0.90])
        sorted_data = sorted(data)

        assert( expected == [ round(x,2) for x in sorted_data] )
예제 #22
0
    def test_transform(self):

        featurePath = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)

        features = ac.loadFeatures(featurePath)

        transformedFeatures = features.transform(lambda x: x.filter(x.start < 12613))

        self.assertEquals(transformedFeatures.toDF().count(), 6)
예제 #23
0
    def test_transform(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        transformedReads = reads.transform(lambda x: x.filter(x.referenceName == "1"))

        self.assertEqual(transformedReads.toDF().count(), 1)
예제 #24
0
    def test_transform(self):

        variantPath = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        variants = ac.loadVariants(variantPath)

        transformedVariants = variants.transform(lambda x: x.filter(x.start < 19190))

        self.assertEquals(transformedVariants.toDF().count(), 3)
예제 #25
0
    def test_save_unordered_sam(self):

        testFile = self.resourceFile("unordered.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        tmpPath = self.tmpFile() + ".sam"
        reads.saveAsSam(tmpPath, asSingleFile=True)

        self.checkFiles(testFile, tmpPath)
예제 #26
0
    def test_HetHomRatioDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data =  HetHomRatioDistribution(self.ss, genotypes, sample=1.0).plot(testMode= True)
        expected = sorted([5.0, 0.6, 0.14, 0.17, 1.67])
        sorted_data = sorted(data)

        assert( expected == [ round(x,2) for x in sorted_data ])
예제 #27
0
    def test_load_narrowPeak(self):

        
        testFile = self.resourceFile("wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
예제 #28
0
    def test_realignIndels_reads(self):

        readsPath = self.resourceFile("small.1.sam")

        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)
        realigned = reads.realignIndels()

        self.assertEqual(realigned.toDF().count(), 20)
예제 #29
0
    def test_toFeatures(self):
        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        features = coverage.toFeatures()

        assert (isinstance(features, FeatureRDD))
        self.assertEquals(features.toDF().count(), coverage.toDF().count())
예제 #30
0
    def test_save(self):

        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        tmpPath = self.tmpFile() + ".coverage.adam"
        coverage.save(tmpPath, asSingleFile=True, disableFastConcat=True)
        assert (os.listdir(tmpPath) != [])