def test_no_alligned_reads_no_bc(self): inBam = tests.get_file("no_mapped_reads.bam") outBam = tests.get_file("test_barcode_collapse.bc.bam") total_count, removed_count = barcode_collapse.barcode_collapse(inBam, outBam, False, em=True)
def test_get_five_prime_utr_sequences(self): """ Tests 5' UTR dictionary creation """ gff_file = pybedtools.BedTool(tests.get_file("gff_uorf_test.gff")) fa_file = tests.get_file("gff_uorf_test.fa") intervals = pybedtools.BedTool(""" chr1 5 10 ENSG1 0 + chr1 15 20 ENSG2 0 - chr1 25 30 ENSG3 0 + chr1 35 40 ENSG3 0 + chr1 45 50 ENSG4 0 - chr1 55 60 ENSG4 0 - """, from_string=True) detector = UORF_detector() test_dict = detector._get_five_prime_utr_sequences(gff_file, fa_file) test_dict = {name : [(interval, str(seq.seq)) for interval, seq in tuple] for name, tuple in test_dict.items()} true_dict = { "ENSG1" : [(intervals[0], "GGGGG")], "ENSG2" : [(intervals[1], "AAAAA")], "ENSG3" : [(intervals[2], "GGGGG"), (intervals[3], "TTTTT")], "ENSG4" : [(intervals[5], "GAAAA"), (intervals[4], "CCCCG")], } self.assertDictEqual(test_dict, true_dict)
def test_duplicate_pos(self): """ same start / with target having both a matching and not matching randomer at that location """ bam1 = tests.get_file("test_cross_contamination/positive1.bam") bam2 = tests.get_file("test_cross_contamination/positive_duplicate.bam") matched, total = correlation(bam1, bam2, "out.sam") self.assertEqual(total, 1) self.assertEqual(matched, 1)
def test_randomer_match_neg2(self): """ same start same randomer neg, with the other offset """ bam1 = tests.get_file("test_cross_contamination/negative2.bam") bam2 = tests.get_file("test_cross_contamination/negative_match.bam") matched, total = correlation(bam1, bam2, "out.sam") self.assertEqual(total, 1) self.assertEqual(matched, 1)
def test_randomer_match_pos(self): """ same start same randomer pos """ bam1 = tests.get_file("test_cross_contamination/positive1.bam") bam2 = tests.get_file("test_cross_contamination/positive_match.bam") matched, total = correlation(bam1, bam2, "out.sam") self.assertEqual(total, 1) self.assertEqual(matched, 1)
def test_randomer_mismatch_neg(self): """ same start different randomer neg """ bam1 = tests.get_file("test_cross_contamination/negative1.bam") bam2 = tests.get_file("test_cross_contamination/negative_mismatch.bam") matched, total = correlation(bam1, bam2, "out.sam") self.assertEqual(total, 1) self.assertEqual(matched, 0)
def test_randomer_off_by_one_neg(self): """ test off by one bug same randomer by off by one neg """ bam1 = tests.get_file("test_cross_contamination/negative1.bam") bam2 = tests.get_file("test_cross_contamination/negative_off_by_one.bam") matched, total = correlation(bam1, bam2, "out.sam") self.assertEqual(total, 1) self.assertEqual(matched, 0)
def test_main(self): rpkm_file = "rpkm_test.rpkm" single_RPKM.main(tests.get_file("test_single_RPKM.count"), os.path.join(tests.get_test_dir(), rpkm_file )) true_result = ["gene flag RPKM", "ENSG1 0 5025125.62814", "ENSG2 0 0.0"] for true, test in zip(true_result, open(tests.get_file(rpkm_file))): self.assertEqual(true.strip().split(), test.strip().split())
def test_pos_vs_neg(self): """ tests same read with same barcode but different strand """ bam1 = tests.get_file("test_cross_contamination/positive1.bam") bam2 = tests.get_file("test_cross_contamination/negative1.bam") matched, total = correlation(bam1, bam2, "out.sam") self.assertEqual(total, 1) self.assertEqual(matched, 0)
def test_randomer_off_by_one_neg(self): """ test off by one bug same randomer by off by one neg """ bam1 = tests.get_file("test_cross_contamination/negative1.bam") bam2 = tests.get_file( "test_cross_contamination/negative_off_by_one.bam") matched, total = correlation(bam1, bam2, "out.sam") self.assertEqual(total, 1) self.assertEqual(matched, 0)
def test_duplicate_pos(self): """ same start / with target having both a matching and not matching randomer at that location """ bam1 = tests.get_file("test_cross_contamination/positive1.bam") bam2 = tests.get_file( "test_cross_contamination/positive_duplicate.bam") matched, total = correlation(bam1, bam2, "out.sam") self.assertEqual(total, 1) self.assertEqual(matched, 1)
def test_main(self): rpkm_file = "rpkm_test.rpkm" single_RPKM.main(tests.get_file("test_single_RPKM.count"), os.path.join(tests.get_test_dir(), rpkm_file)) true_result = [ "gene flag RPKM", "ENSG1 0 5025125.62814", "ENSG2 0 0.0" ] for true, test in zip(true_result, open(tests.get_file(rpkm_file))): self.assertEqual(true.strip().split(), test.strip().split())
def test_barcode_collapse_bacoded(self): """ Tests on duplciate removal for barcoded samples """ inBam = tests.get_file("test_barcode_collapse.bam") outBam = tests.get_file("test_barcode_collapse.bc.bam") total_count, removed_count = barcode_collapse.barcode_collapse( inBam, outBam, True) true_total_count = {"AAGGGTCGC": 3, "AAGGGTCGT": 4} true_removed_count = {"AAGGGTCGC": 1, "AAGGGTCGT": 2} self.assertDictEqual(true_total_count, total_count) self.assertDictEqual(true_removed_count, removed_count)
def test_count_gene(self): """ Tests count_gene, makes sure it runs and outputs proper counts """ result = count_tags.count_gene( tests.get_file("test.bam"), { 'start': 1, 'stop': 500, 'chrom': 'chr1', 'strand': "+", "frea": "0", 'raw_count': 0, 'gene_id': 'ENSG1', "regions": [(1, 100), (399, 500)] }, "none") self.assertEqual(result[0][0], "ENSG1:1-100") self.assertAlmostEqual(result[0][1]['counts'].gene_count, 12, delta=3) self.assertAlmostEqual(result[0][1]['counts'].region_count, 4, delta=3) self.assertEqual(result[0][1]['start'], 1) self.assertEqual(result[0][1]['stop'], 100) self.assertEqual(result[1][0], "ENSG1:399-500") self.assertAlmostEqual(result[1][1]['counts'].gene_count, 12, delta=3) self.assertAlmostEqual(result[1][1]['counts'].region_count, 8, delta=3) self.assertEqual(result[1][1]['start'], 399) self.assertEqual(result[1][1]['stop'], 500)
def test_count_to_regions(self): """ Tests annotation building """ result = count_tags.count_to_regions( tests.get_file("count_tags_annotation.bed")) true_result = { "ENSG1": { 'start': 1, 'stop': 300, 'chrom': 'chr1', 'strand': "+", "frea": "0", 'gene_id': 'ENSG1', "regions": [(1, 100), (200, 300)] }, "ENSG2": { 'start': 400, 'stop': 700, 'chrom': 'chr1', 'strand': "-", "frea": "0", 'gene_id': 'ENSG2', "regions": [(600, 700), (400, 500)] } } self.assertDictEqual(true_result, dict(result))
def test_submit_oldsplice(self): true_result = """#!/bin/bash #PBS -N oldsplice #PBS -o test_output/runOldsplice.sh.out #PBS -e test_output/runOldsplice.sh.err #PBS -V #PBS -l walltime=0:30:00 #PBS -l nodes=1:ppn=16 #PBS -A yeo-group #PBS -q home #PBS -t 1-4%1000 # Go to the directory from which the script was called cd $PBS_O_WORKDIR cmd[1]="oldsplice.py -b test1.bam -s hg19 -o test1.splices --splice_type SE --splice_type MXE --processors 16" cmd[2]="oldsplice.py -f -b test1.bam -s hg19 -o test1.flip.splices --splice_type SE --splice_type MXE --processors 16" cmd[3]="oldsplice.py -b test2.bam -s hg19 -o test2.splices --splice_type SE --splice_type MXE --processors 16" cmd[4]="oldsplice.py -f -b test2.bam -s hg19 -o test2.flip.splices --splice_type SE --splice_type MXE --processors 16" eval ${cmd[$PBS_ARRAYID]} """ true_result = true_result.split('\n') out_sh = '{}/runOldsplice.sh'.format(self.out_dir) OldspliceSubmitter(tests.get_file('sample_info.txt'), submit=False, queue_type='PBS', out_sh=out_sh) for true, test in zip(true_result, open(out_sh)): self.assertEqual(true.strip().split(), test.strip().split())
def test_barcode_collapse_not_barcoded(self): """ Tests duplicate removal for non barcoded samples """ inBam = tests.get_file("test_barcode_collapse.bam") outBam = tests.get_file("test_barcode_collapse.bc.bam") total_count, removed_count = barcode_collapse.barcode_collapse(inBam, outBam, False) true_total_count = {"total": 7} true_removed_count = {"total": 5} self.assertDictEqual(true_total_count, total_count) self.assertDictEqual(true_removed_count, removed_count)
def test_barcode_collapse_not_barcoded(self): """ Tests duplicate removal for non barcoded samples """ inBam = tests.get_file("test_barcode_collapse.bam") outBam = tests.get_file("test_barcode_collapse.bc.bam") total_count, removed_count = barcode_collapse.barcode_collapse( inBam, outBam, False) true_total_count = {"total": 7} true_removed_count = {"total": 5} self.assertDictEqual(true_total_count, total_count) self.assertDictEqual(true_removed_count, removed_count)
def test_barcode_collapse_bacoded(self): """ Tests on duplciate removal for barcoded samples """ inBam = tests.get_file("test_barcode_collapse.bam") outBam = tests.get_file("test_barcode_collapse.bc.bam") total_count, removed_count = barcode_collapse.barcode_collapse(inBam, outBam, True) true_total_count = {"AAGGGTCGC": 3, "AAGGGTCGT": 4} true_removed_count = {"AAGGGTCGC": 1, "AAGGGTCGT": 2} self.assertDictEqual(true_total_count, total_count) self.assertDictEqual(true_removed_count, removed_count)
def test_count_to_regions(self): """ Tests annotation building """ result = count_tags.count_to_regions(tests.get_file("count_tags_annotation.bed")) true_result = {"ENSG1" : {'start' : 1, 'stop' : 300, 'chrom' : 'chr1', 'strand' : "+", "frea" : "0", 'gene_id' : 'ENSG1' , "regions" : [(1, 100), (200, 300)]}, "ENSG2" : {'start' : 400, 'stop' : 700, 'chrom' : 'chr1', 'strand' : "-", "frea" : "0", 'gene_id' : 'ENSG2', "regions" : [(600,700), (400, 500)] }} self.assertDictEqual(true_result, dict(result))
def test_count_gene(self): """ Tests count_gene, makes sure it runs and outputs proper counts """ result = count_tags.count_gene(tests.get_file("test.bam"), {'start' : 1, 'stop' : 500, 'chrom' : 'chr1', 'strand' : "+", "frea" : "0", 'raw_count' : 0, 'gene_id' : 'ENSG1' , "regions" : [(1, 100), (399, 500)]}, "none") self.assertEqual(result[0][0], "ENSG1:1-100") self.assertAlmostEqual(result[0][1]['counts'].gene_count, 12, delta=3) self.assertAlmostEqual(result[0][1]['counts'].region_count, 4, delta=3) self.assertEqual(result[0][1]['start'], 1) self.assertEqual(result[0][1]['stop'], 100) self.assertEqual(result[1][0], "ENSG1:399-500") self.assertAlmostEqual(result[1][1]['counts'].gene_count, 12, delta=3) self.assertAlmostEqual(result[1][1]['counts'].region_count, 8, delta=3) self.assertEqual(result[1][1]['start'], 399) self.assertEqual(result[1][1]['stop'], 500)