Ejemplo n.º 1
0
 def test_no_alligned_reads_no_bc(self):
     inBam = tests.get_file("no_mapped_reads.bam")
     outBam = tests.get_file("test_barcode_collapse.bc.bam")
     total_count, removed_count = barcode_collapse.barcode_collapse(inBam,
                                                                    outBam,
                                                                    False,
                                                                    em=True)
Ejemplo n.º 2
0
    def test_get_five_prime_utr_sequences(self):
        """
        
        Tests 5' UTR dictionary creation
        
        """
        
        gff_file = pybedtools.BedTool(tests.get_file("gff_uorf_test.gff"))
        fa_file = tests.get_file("gff_uorf_test.fa")
        intervals = pybedtools.BedTool("""  chr1    5    10    ENSG1    0    +
                                chr1    15    20    ENSG2    0    -
                                chr1    25    30    ENSG3    0    +
                                chr1    35    40    ENSG3    0    +
                                chr1    45    50    ENSG4    0    -
                                chr1    55    60    ENSG4    0    -

                              """, from_string=True)
        detector = UORF_detector()
        test_dict = detector._get_five_prime_utr_sequences(gff_file, fa_file)
        test_dict = {name : [(interval, str(seq.seq)) for interval, seq in tuple] 
                     for name, tuple in test_dict.items()}
        true_dict = {
                       "ENSG1" : [(intervals[0], "GGGGG")],
                       "ENSG2" : [(intervals[1], "AAAAA")],
                       "ENSG3" : [(intervals[2], "GGGGG"), (intervals[3], "TTTTT")],
                       "ENSG4" : [(intervals[5], "GAAAA"), (intervals[4], "CCCCG")],
                       }
        
        self.assertDictEqual(test_dict, true_dict)
Ejemplo n.º 3
0
    def test_duplicate_pos(self):
        """
        same start / with target having both a matching and not matching randomer at that location
        """

        bam1 = tests.get_file("test_cross_contamination/positive1.bam")
        bam2 = tests.get_file("test_cross_contamination/positive_duplicate.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 1)
Ejemplo n.º 4
0
    def test_randomer_match_neg2(self):
        """
        same start same randomer neg, with the other offset
        """

        bam1 = tests.get_file("test_cross_contamination/negative2.bam")
        bam2 = tests.get_file("test_cross_contamination/negative_match.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 1)
Ejemplo n.º 5
0
    def test_randomer_match_pos(self):
        """
        same start same randomer pos
        """

        bam1 = tests.get_file("test_cross_contamination/positive1.bam")
        bam2 = tests.get_file("test_cross_contamination/positive_match.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 1)
Ejemplo n.º 6
0
    def test_randomer_mismatch_neg(self):
        """
        same start different randomer neg
        """

        bam1 = tests.get_file("test_cross_contamination/negative1.bam")
        bam2 = tests.get_file("test_cross_contamination/negative_mismatch.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 0)
Ejemplo n.º 7
0
    def test_randomer_off_by_one_neg(self):
        """
        test off by one bug same randomer by off by one neg
        """

        bam1 = tests.get_file("test_cross_contamination/negative1.bam")
        bam2 = tests.get_file("test_cross_contamination/negative_off_by_one.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 0)
Ejemplo n.º 8
0
 def test_main(self):
     rpkm_file = "rpkm_test.rpkm"
     single_RPKM.main(tests.get_file("test_single_RPKM.count"), os.path.join(tests.get_test_dir(), rpkm_file ))
     
     true_result = ["gene    flag    RPKM",
                     "ENSG1    0    5025125.62814",
                      "ENSG2    0    0.0"]
                      
     for true, test in zip(true_result, open(tests.get_file(rpkm_file))):
         self.assertEqual(true.strip().split(), test.strip().split())
Ejemplo n.º 9
0
    def test_randomer_match_neg2(self):
        """
        same start same randomer neg, with the other offset
        """

        bam1 = tests.get_file("test_cross_contamination/negative2.bam")
        bam2 = tests.get_file("test_cross_contamination/negative_match.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 1)
Ejemplo n.º 10
0
    def test_pos_vs_neg(self):
        """
        tests same read with same barcode but different strand
        """

        bam1 = tests.get_file("test_cross_contamination/positive1.bam")
        bam2 = tests.get_file("test_cross_contamination/negative1.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 0)
Ejemplo n.º 11
0
    def test_randomer_mismatch_neg(self):
        """
        same start different randomer neg
        """

        bam1 = tests.get_file("test_cross_contamination/negative1.bam")
        bam2 = tests.get_file("test_cross_contamination/negative_mismatch.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 0)
Ejemplo n.º 12
0
    def test_randomer_match_pos(self):
        """
        same start same randomer pos
        """

        bam1 = tests.get_file("test_cross_contamination/positive1.bam")
        bam2 = tests.get_file("test_cross_contamination/positive_match.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 1)
Ejemplo n.º 13
0
    def test_pos_vs_neg(self):
        """
        tests same read with same barcode but different strand
        """

        bam1 = tests.get_file("test_cross_contamination/positive1.bam")
        bam2 = tests.get_file("test_cross_contamination/negative1.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 0)
Ejemplo n.º 14
0
    def test_randomer_off_by_one_neg(self):
        """
        test off by one bug same randomer by off by one neg
        """

        bam1 = tests.get_file("test_cross_contamination/negative1.bam")
        bam2 = tests.get_file(
            "test_cross_contamination/negative_off_by_one.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 0)
Ejemplo n.º 15
0
    def test_duplicate_pos(self):
        """
        same start / with target having both a matching and not matching randomer at that location
        """

        bam1 = tests.get_file("test_cross_contamination/positive1.bam")
        bam2 = tests.get_file(
            "test_cross_contamination/positive_duplicate.bam")
        matched, total = correlation(bam1, bam2, "out.sam")
        self.assertEqual(total, 1)
        self.assertEqual(matched, 1)
Ejemplo n.º 16
0
    def test_main(self):
        rpkm_file = "rpkm_test.rpkm"
        single_RPKM.main(tests.get_file("test_single_RPKM.count"),
                         os.path.join(tests.get_test_dir(), rpkm_file))

        true_result = [
            "gene    flag    RPKM", "ENSG1    0    5025125.62814",
            "ENSG2    0    0.0"
        ]

        for true, test in zip(true_result, open(tests.get_file(rpkm_file))):
            self.assertEqual(true.strip().split(), test.strip().split())
Ejemplo n.º 17
0
    def test_barcode_collapse_bacoded(self):
        """
        Tests on duplciate removal for barcoded samples
        """

        inBam = tests.get_file("test_barcode_collapse.bam")
        outBam = tests.get_file("test_barcode_collapse.bc.bam")
        total_count, removed_count = barcode_collapse.barcode_collapse(
            inBam, outBam, True)

        true_total_count = {"AAGGGTCGC": 3, "AAGGGTCGT": 4}

        true_removed_count = {"AAGGGTCGC": 1, "AAGGGTCGT": 2}

        self.assertDictEqual(true_total_count, total_count)
        self.assertDictEqual(true_removed_count, removed_count)
Ejemplo n.º 18
0
    def test_count_gene(self):
        """
        
        Tests count_gene, makes sure it runs and outputs proper counts
        """
        result = count_tags.count_gene(
            tests.get_file("test.bam"), {
                'start': 1,
                'stop': 500,
                'chrom': 'chr1',
                'strand': "+",
                "frea": "0",
                'raw_count': 0,
                'gene_id': 'ENSG1',
                "regions": [(1, 100), (399, 500)]
            }, "none")

        self.assertEqual(result[0][0], "ENSG1:1-100")
        self.assertAlmostEqual(result[0][1]['counts'].gene_count, 12, delta=3)
        self.assertAlmostEqual(result[0][1]['counts'].region_count, 4, delta=3)
        self.assertEqual(result[0][1]['start'], 1)
        self.assertEqual(result[0][1]['stop'], 100)

        self.assertEqual(result[1][0], "ENSG1:399-500")
        self.assertAlmostEqual(result[1][1]['counts'].gene_count, 12, delta=3)
        self.assertAlmostEqual(result[1][1]['counts'].region_count, 8, delta=3)
        self.assertEqual(result[1][1]['start'], 399)
        self.assertEqual(result[1][1]['stop'], 500)
Ejemplo n.º 19
0
    def test_count_to_regions(self):
        """
        
        Tests annotation building
        
        """

        result = count_tags.count_to_regions(
            tests.get_file("count_tags_annotation.bed"))

        true_result = {
            "ENSG1": {
                'start': 1,
                'stop': 300,
                'chrom': 'chr1',
                'strand': "+",
                "frea": "0",
                'gene_id': 'ENSG1',
                "regions": [(1, 100), (200, 300)]
            },
            "ENSG2": {
                'start': 400,
                'stop': 700,
                'chrom': 'chr1',
                'strand': "-",
                "frea": "0",
                'gene_id': 'ENSG2',
                "regions": [(600, 700), (400, 500)]
            }
        }

        self.assertDictEqual(true_result, dict(result))
Ejemplo n.º 20
0
    def test_submit_oldsplice(self):
        true_result = """#!/bin/bash
#PBS -N oldsplice
#PBS -o test_output/runOldsplice.sh.out
#PBS -e test_output/runOldsplice.sh.err
#PBS -V
#PBS -l walltime=0:30:00
#PBS -l nodes=1:ppn=16
#PBS -A yeo-group
#PBS -q home
#PBS -t 1-4%1000

# Go to the directory from which the script was called
cd $PBS_O_WORKDIR
cmd[1]="oldsplice.py -b test1.bam -s hg19 -o test1.splices --splice_type SE --splice_type MXE --processors 16"
cmd[2]="oldsplice.py -f -b test1.bam -s hg19 -o test1.flip.splices --splice_type SE --splice_type MXE --processors 16"
cmd[3]="oldsplice.py -b test2.bam -s hg19 -o test2.splices --splice_type SE --splice_type MXE --processors 16"
cmd[4]="oldsplice.py -f -b test2.bam -s hg19 -o test2.flip.splices --splice_type SE --splice_type MXE --processors 16"
eval ${cmd[$PBS_ARRAYID]}
"""
        true_result = true_result.split('\n')

        out_sh = '{}/runOldsplice.sh'.format(self.out_dir)
        OldspliceSubmitter(tests.get_file('sample_info.txt'),
                           submit=False,
                           queue_type='PBS',
                           out_sh=out_sh)
        for true, test in zip(true_result, open(out_sh)):
            self.assertEqual(true.strip().split(), test.strip().split())
Ejemplo n.º 21
0
    def test_barcode_collapse_not_barcoded(self):
        """
        Tests duplicate removal for non barcoded samples
        """
        
        inBam = tests.get_file("test_barcode_collapse.bam")
        outBam = tests.get_file("test_barcode_collapse.bc.bam")
        total_count, removed_count = barcode_collapse.barcode_collapse(inBam, outBam, False)
        
        
        true_total_count = {"total": 7}
        
        true_removed_count = {"total": 5}

        self.assertDictEqual(true_total_count, total_count)
        self.assertDictEqual(true_removed_count, removed_count)
Ejemplo n.º 22
0
    def test_submit_oldsplice(self):
        true_result = """#!/bin/bash
#PBS -N oldsplice
#PBS -o test_output/runOldsplice.sh.out
#PBS -e test_output/runOldsplice.sh.err
#PBS -V
#PBS -l walltime=0:30:00
#PBS -l nodes=1:ppn=16
#PBS -A yeo-group
#PBS -q home
#PBS -t 1-4%1000

# Go to the directory from which the script was called
cd $PBS_O_WORKDIR
cmd[1]="oldsplice.py -b test1.bam -s hg19 -o test1.splices --splice_type SE --splice_type MXE --processors 16"
cmd[2]="oldsplice.py -f -b test1.bam -s hg19 -o test1.flip.splices --splice_type SE --splice_type MXE --processors 16"
cmd[3]="oldsplice.py -b test2.bam -s hg19 -o test2.splices --splice_type SE --splice_type MXE --processors 16"
cmd[4]="oldsplice.py -f -b test2.bam -s hg19 -o test2.flip.splices --splice_type SE --splice_type MXE --processors 16"
eval ${cmd[$PBS_ARRAYID]}
"""
        true_result = true_result.split('\n')

        out_sh = '{}/runOldsplice.sh'.format(self.out_dir)
        OldspliceSubmitter(tests.get_file('sample_info.txt'), submit=False,
                           queue_type='PBS', out_sh=out_sh)
        for true, test in zip(true_result, open(out_sh)):
            self.assertEqual(true.strip().split(), test.strip().split())
Ejemplo n.º 23
0
    def test_barcode_collapse_not_barcoded(self):
        """
        Tests duplicate removal for non barcoded samples
        """

        inBam = tests.get_file("test_barcode_collapse.bam")
        outBam = tests.get_file("test_barcode_collapse.bc.bam")
        total_count, removed_count = barcode_collapse.barcode_collapse(
            inBam, outBam, False)

        true_total_count = {"total": 7}

        true_removed_count = {"total": 5}

        self.assertDictEqual(true_total_count, total_count)
        self.assertDictEqual(true_removed_count, removed_count)
Ejemplo n.º 24
0
    def test_barcode_collapse_bacoded(self):
        """
        Tests on duplciate removal for barcoded samples
        """
        
        inBam = tests.get_file("test_barcode_collapse.bam")
        outBam = tests.get_file("test_barcode_collapse.bc.bam")
        total_count, removed_count = barcode_collapse.barcode_collapse(inBam, outBam, True)
        
        
        true_total_count = {"AAGGGTCGC": 3,
                            "AAGGGTCGT": 4}
        
        true_removed_count = {"AAGGGTCGC": 1,
                            "AAGGGTCGT": 2}

        self.assertDictEqual(true_total_count, total_count)
        self.assertDictEqual(true_removed_count, removed_count)
Ejemplo n.º 25
0
    def test_count_to_regions(self):
        
        """
        
        Tests annotation building
        
        """
        
        result = count_tags.count_to_regions(tests.get_file("count_tags_annotation.bed"))
        
        true_result = {"ENSG1" : {'start' : 1, 'stop' : 300, 
                                  'chrom' : 'chr1', 'strand' : "+", 
                                  "frea" : "0", 'gene_id' : 'ENSG1' ,
                                  "regions" : [(1, 100), (200, 300)]},
                       "ENSG2" : {'start' : 400, 'stop' : 700, 
                                  'chrom' : 'chr1', 'strand' : "-", 
                                  "frea" : "0", 'gene_id' : 'ENSG2',
                                  "regions" : [(600,700), (400, 500)] }}

        self.assertDictEqual(true_result, dict(result))
Ejemplo n.º 26
0
 def test_count_gene(self):
     
     """
     
     Tests count_gene, makes sure it runs and outputs proper counts
     """
     result = count_tags.count_gene(tests.get_file("test.bam"), {'start' : 1, 'stop' : 500, 
                               'chrom' : 'chr1', 'strand' : "+", 
                               "frea" : "0", 'raw_count' : 0, 'gene_id' : 'ENSG1' ,
                               "regions" : [(1, 100), (399, 500)]}, "none")
             
     self.assertEqual(result[0][0], "ENSG1:1-100")
     self.assertAlmostEqual(result[0][1]['counts'].gene_count, 12, delta=3)
     self.assertAlmostEqual(result[0][1]['counts'].region_count, 4, delta=3)
     self.assertEqual(result[0][1]['start'], 1)
     self.assertEqual(result[0][1]['stop'], 100)
     
     self.assertEqual(result[1][0], "ENSG1:399-500")
     self.assertAlmostEqual(result[1][1]['counts'].gene_count, 12, delta=3)
     self.assertAlmostEqual(result[1][1]['counts'].region_count, 8, delta=3)
     self.assertEqual(result[1][1]['start'], 399)
     self.assertEqual(result[1][1]['stop'], 500)
Ejemplo n.º 27
0
 def test_no_alligned_reads_no_bc(self):
     inBam = tests.get_file("no_mapped_reads.bam")
     outBam = tests.get_file("test_barcode_collapse.bc.bam")
     total_count, removed_count = barcode_collapse.barcode_collapse(inBam, outBam, False, em=True)