Exemple #1
0
    def test_round_trip_gtf(self):

        testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf")
        ac = ADAMContext(self.sc)

        features = ac.loadFeatures(testFile)
        tmpPath = self.tmpFile() + ".gtf"
        features.save(tmpPath, asSingleFile=True)

        savedFeatures = ac.loadFeatures(testFile)

        self.assertEquals(features._jvmRdd.jrdd().count(),
                          savedFeatures._jvmRdd.jrdd().count())
Exemple #2
0
    def test_round_trip_bed(self):

        testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.sc)

        features = ac.loadFeatures(testFile)
        tmpPath = self.tmpFile() + ".bed"
        features.save(tmpPath, asSingleFile=True)

        savedFeatures = ac.loadFeatures(testFile)

        self.assertEquals(features._jvmRdd.jrdd().count(),
                          savedFeatures._jvmRdd.jrdd().count())
Exemple #3
0
    def test_round_trip_interval_list(self):

        testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list")
        ac = ADAMContext(self.sc)

        features = ac.loadFeatures(testFile)
        tmpPath = self.tmpFile() + ".interval_list"
        features.save(tmpPath, asSingleFile=True)

        savedFeatures = ac.loadFeatures(testFile)

        self.assertEquals(features._jvmRdd.jrdd().count(),
                          savedFeatures._jvmRdd.jrdd().count())
    def test_round_trip_bed(self):

        testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)
        
        features = ac.loadFeatures(testFile)
        tmpPath = self.tmpFile() + ".bed"
        features.save(tmpPath,
                      asSingleFile=True)

        savedFeatures = ac.loadFeatures(testFile)

        self.assertEqual(features._jvmRdd.jrdd().count(),
                          savedFeatures._jvmRdd.jrdd().count())
    def test_round_trip_gtf(self):

        testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf")
        ac = ADAMContext(self.ss)
        
        features = ac.loadFeatures(testFile)
        tmpPath = self.tmpFile() + ".gtf"
        features.save(tmpPath,
                      asSingleFile=True)

        savedFeatures = ac.loadFeatures(testFile)

        self.assertEqual(features._jvmRdd.jrdd().count(),
                          savedFeatures._jvmRdd.jrdd().count())
    def test_round_trip_interval_list(self):

        testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list")
        ac = ADAMContext(self.ss)
        
        features = ac.loadFeatures(testFile)
        tmpPath = self.tmpFile() + ".interval_list"
        features.save(tmpPath,
                      asSingleFile=True)

        savedFeatures = ac.loadFeatures(testFile)

        self.assertEqual(features._jvmRdd.jrdd().count(),
                          savedFeatures._jvmRdd.jrdd().count())
Exemple #7
0
    def test_round_trip_narrowPeak(self):

        testFile = self.resourceFile(
            "wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak")
        ac = ADAMContext(self.sc)

        features = ac.loadFeatures(testFile)
        tmpPath = self.tmpFile() + ".narrowPeak"
        features.save(tmpPath, asSingleFile=True)

        savedFeatures = ac.loadFeatures(testFile)

        self.assertEquals(features._jvmRdd.jrdd().count(),
                          savedFeatures._jvmRdd.jrdd().count())
    def test_round_trip_narrowPeak(self):

        testFile = self.resourceFile("wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak")
        ac = ADAMContext(self.ss)
        
        features = ac.loadFeatures(testFile)
        tmpPath = self.tmpFile() + ".narrowPeak"
        features.save(tmpPath,
                      asSingleFile=True)

        savedFeatures = ac.loadFeatures(testFile)

        self.assertEqual(features._jvmRdd.jrdd().count(),
                          savedFeatures._jvmRdd.jrdd().count())
    def test_load_interval_list(self):

        testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 369)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 369)
    def test_load_gtf(self):

        testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 15)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 15)
Exemple #11
0
    def test_load_gtf(self):

        testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 15)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 15)
Exemple #12
0
    def test_load_interval_list(self):

        testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 369)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 369)
Exemple #13
0
    def test_load_bed(self):

        testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
    def test_load_bed(self):

        testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
    def test_transform(self):

        featurePath = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)

        features = ac.loadFeatures(featurePath)

        transformedFeatures = features.transform(lambda x: x.filter(x.start < 12613))

        self.assertEqual(transformedFeatures.toDF().count(), 6)
    def test_load_narrowPeak(self):

        
        testFile = self.resourceFile("wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
Exemple #17
0
    def test_transform(self):

        featurePath = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)

        features = ac.loadFeatures(featurePath)

        transformedFeatures = features.transform(lambda x: x.filter(x.start < 12613))

        self.assertEquals(transformedFeatures.toDF().count(), 6)
Exemple #18
0
    def test_load_narrowPeak(self):

        
        testFile = self.resourceFile("wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
    def test_shuffle_right_outer_join_groupBy_left(self):

        readsPath = self.resourceFile("small.1.sam")
        targetsPath = self.resourceFile("small.1.bed")

        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)
        targets = ac.loadFeatures(targetsPath)

        jRdd = reads.rightOuterShuffleRegionJoinAndGroupByLeft(targets)

        self.assertEqual(jRdd.toDF().count(), 21)
    def test_shuffle_inner_join(self):

        readsPath = self.resourceFile("small.1.sam")
        targetsPath = self.resourceFile("small.1.bed")

        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)
        targets = ac.loadFeatures(targetsPath)

        jRdd = reads.shuffleRegionJoin(targets)

        self.assertEqual(jRdd.toDF().count(), 5)
    def test_broadcast_right_outer_join(self):

        readsPath = self.resourceFile("small.1.sam")
        targetsPath = self.resourceFile("small.1.bed")

        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)
        targets = ac.loadFeatures(targetsPath)

        jRdd = reads.rightOuterBroadcastRegionJoin(targets)

        self.assertEqual(jRdd.toDF().count(), 6)
Exemple #22
0
    def test_visualize_features(self):
        genomicRDD = GenomicVizRDD(self.ss)

        # load file
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("smalltest.bed")

        # read alignments
        features = ac.loadFeatures(testFile)

        contig = "chrM"
        start = 1
        end = 2000

        x = genomicRDD.ViewFeatures(features, contig, start, end)
        assert (True)
Exemple #23
0
    def test_visualize_features(self):
        # load file
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("smalltest.bed")

        # read features
        features = ac.loadFeatures(testFile)

        featureViz = FeatureSummary(ac, features)

        contig = "chrM"
        start = 1
        end = 2000

        x = featureViz.viewPileup(contig, start, end)
        assert(x != None)