def test_bam_compare_filter_blacklist(): """ Test --samFlagInclude --samFlagExclude --minMappingQuality --ignoreDuplicates and --blackListFileName """ outfile = '/tmp/test_file_filter.bg' args = "-b1 {} -b2 {} --normalizeTo1x 1400 -p 1 -o {} -of bedgraph --samFlagInclude 512 " \ "--samFlagExclude 256 --minMappingQuality 5 --ignoreDuplicates " \ "--blackListFileName {}".format(BAMFILE_FILTER1, BAMFILE_FILTER2, outfile, BEDFILE_FILTER) args = args.split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() expected = [ '3R\t0\t100\t0\n', '3R\t100\t150\t-0.220909\n', '3R\t150\t200\t-0.159356\n', '3R\t200\t250\t-0.0718929\n', '3R\t250\t300\t0.135883\n', '3R\t300\t350\t0.103093\n', '3R\t350\t400\t-0.0895516\n', '3R\t400\t450\t0.0308374\n', '3R\t450\t500\t0.0989418\n', '3R\t500\t550\t0.207044\n', '3R\t550\t600\t0.0198996\n', '3R\t600\t650\t-0.0957241\n', '3R\t650\t700\t0.00968255\n', '3R\t700\t750\t-0.040642\n', '3R\t750\t800\t-0.123451\n', '3R\t900\t950\t0.212545\n', '3R\t950\t1000\t0.199309\n', '3R\t1000\t1050\t0.167945\n', '3R\t1050\t1500\t0\n' ] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_filter_blacklist(): """ Test --samFlagInclude --samFlagExclude --minMappingQuality --ignoreDuplicates and --blackListFileName """ outfile = '/tmp/test_file_filter.bg' args = "-b1 {} -b2 {} --normalizeTo1x 1400 -p 1 -o {} -of bedgraph --samFlagInclude 512 " \ "--samFlagExclude 256 --minMappingQuality 5 --ignoreDuplicates " \ "--blackListFileName {}".format(BAMFILE_FILTER1, BAMFILE_FILTER2, outfile, BEDFILE_FILTER) args = args.split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() expected = ['3R\t0\t100\t0\n', '3R\t100\t150\t-0.220909\n', '3R\t150\t200\t-0.159356\n', '3R\t200\t250\t-0.0718929\n', '3R\t250\t300\t0.135883\n', '3R\t300\t350\t0.103093\n', '3R\t350\t400\t-0.0895516\n', '3R\t400\t450\t0.0308374\n', '3R\t450\t500\t0.0989418\n', '3R\t500\t550\t0.207044\n', '3R\t550\t600\t0.0198996\n', '3R\t600\t650\t-0.0957241\n', '3R\t650\t700\t0.00968255\n', '3R\t700\t750\t-0.040642\n', '3R\t750\t800\t-0.123451\n', '3R\t900\t950\t0.212545\n', '3R\t950\t1000\t0.199309\n', '3R\t1000\t1050\t0.167945\n', '3R\t1050\t1500\t0\n'] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_diff_files(): """ Test with two different files """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --ratio subtract " \ "-o {} -p 1 --outFileFormat bedgraph".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) resp = open(outfile, 'r').readlines() expected = ['3R\t0\t50\t0.00\n', '3R\t50\t100\t-1.00\n', '3R\t100\t150\t0.00\n', '3R\t150\t200\t-1.0\n'] assert resp == expected, "{} != {}".format(resp, expected) unlink(outfile)
def test_bam_compare_ZoverZ(): """ Ensure --skipZeroOverZero works in bamCompare """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --outFileFormat bedgraph --scaleFactors 1:1 -o {} " \ "--skipZeroOverZero".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() expected = ['3R\t50\t100\t-1\n', '3R\t100\t150\t0\n', '3R\t150\t200\t-0.584963\n'] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_pseudocounts(): """ Test with different pseudocounts """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --outFileFormat bedgraph --scaleFactors 1:1 -o {} " \ "--pseudocount 1 0".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() expected = ['3R\t0\t50\tinf\n', '3R\t50\t100\t0\n', '3R\t100\t150\t1\n', '3R\t150\t200\t0\n'] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_diff_files(): """ Test with two different files """ outfile = '/tmp/test_file.bg' for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --operation subtract " \ "-o {} -p 1 --outFileFormat bedgraph".format(A, B, outfile).split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() expected = ['3R\t0\t50\t0\n', '3R\t50\t100\t-1\n', '3R\t100\t150\t0\n', '3R\t150\t200\t-1\n'] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_extend(): """ Test read extension """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --extend 100 --scaleFactors 1:1 --ratio subtract " \ "-o {} -p 1 --outFileFormat bedgraph".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() expected = ['3R\t0\t100\t-1\n', '3R\t100\t150\t1\n', '3R\t150\t200\t-1\n'] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_arguments(): """ Test minimal command line args for bamCoverage. The ratio between the same file is taken, therefore, the expected value is 1.0 for all bins. """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} " \ "-o {} -p 1 --outFileFormat bedgraph --ratio ratio".format(BAMFILE_B, BAMFILE_B, outfile).split() bam_comp.main(args) resp = open(outfile, 'r').readlines() expected = ['3R\t0\t200\t1.0\n'] assert resp == expected, "{} != {}".format(resp, expected) unlink(outfile)
def test_bam_compare_diff_files_skipnas(): """ Test skipnas Compared to the previous tests, any region that do not have coverage (in either of the bam files) is not included in the bedgraphfile. """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --ratio subtract " \ "-o {} -p 1 --outFileFormat bedgraph --skipNAs".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) resp = open(outfile, 'r').readlines() expected = ['3R\t100\t150\t0.00\n', '3R\t150\t200\t-1.0\n'] assert resp == expected, "{} != {}".format(resp, expected) unlink(outfile)
def test_bam_compare_ZoverZ(): """ Ensure --skipZeroOverZero works in bamCompare """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --outFileFormat bedgraph --scaleFactors 1:1 -o {} " \ "--skipZeroOverZero".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() expected = [ '3R\t50\t100\t-1\n', '3R\t100\t150\t0\n', '3R\t150\t200\t-0.584963\n' ] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_diff_files_skipnas(): """ Test skipnas Compared to the previous tests, any region that do not have coverage (in either of the bam files) is not included in the bedgraph file. """ outfile = '/tmp/test_file.bg' for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --operation subtract " \ "-o {} -p 1 --outFileFormat bedgraph --skipNAs".format(A, B, outfile).split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() expected = ['3R\t100\t150\t0\n', '3R\t150\t200\t-1\n'] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_pseudocounts(): """ Test with different pseudocounts """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --outFileFormat bedgraph --scaleFactors 1:1 -o {} " \ "--pseudocount 1 0".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() expected = [ '3R\t0\t50\tinf\n', '3R\t50\t100\t0\n', '3R\t100\t150\t1\n', '3R\t150\t200\t0\n' ] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_arguments(): """ Test minimal command line args for bamCoverage. The ratio between the same file is taken, therefore, the expected value is 1.0 for all bins. """ outfile = '/tmp/test_file.bg' for fname in [BAMFILE_B, CRAMFILE_B]: args = "--bamfile1 {} --bamfile2 {} " \ "-o {} -p 1 --outFileFormat bedgraph --operation ratio".format(fname, fname, outfile).split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() expected = ['3R\t0\t200\t1\n'] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_scale_factors_subtract(): """ Test scale factor """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --ratio subtract --ignoreForNormalization chr_cigar " \ "-o {} -p 1 --outFileFormat bedgraph --normalizeTo1x 200".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) # The scale factors are [ 1. 0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() """ The distribution of reads for the bam file is: 0 100 200 |------------------------------------------------------------| testA.bam 3R ==============> <============== testB.bam 3R <============== ==============> ==============> ==============> ------------------------------------------------------------------------------ subtract: scale factors [1,0.5], after applying normalize to 1x, coverage of test_A is 0.5, thus the factor to reach a coverate of 1 is x2. Thus, the final scale factors are [2,1] after applying factors: 0 -1 1 0 """ expected = [ '3R\t0\t50\t0\n', '3R\t50\t100\t-1\n', '3R\t100\t150\t1\n', '3R\t150\t200\t0\n' ] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_diff_files(): """ Test with two different files """ outfile = '/tmp/test_file.bg' for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --operation subtract " \ "-o {} -p 1 --outFileFormat bedgraph".format(A, B, outfile).split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() expected = [ '3R\t0\t50\t0\n', '3R\t50\t100\t-1\n', '3R\t100\t150\t0\n', '3R\t150\t200\t-1\n' ] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_scale_factors_subtract(): """ Test scale factor """ outfile = '/tmp/test_file.bg' for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: args = "--bamfile1 {} --bamfile2 {} --operation subtract --ignoreForNormalization chr_cigar " \ "-o {} -p 1 --outFileFormat bedgraph --scaleFactorsMethod None --normalizeUsing CPM".format(A, B, outfile).split() bam_comp.main(args) # The scale factors are [ 1. 0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() """ The distribution of reads for the bam file is: 0 100 200 |------------------------------------------------------------| testA.bam 3R ==============> <============== testB.bam 3R <============== ==============> ==============> ==============> ------------------------------------------------------------------------------ subtract: After applying CPM normalization, the scale factors are [500000,250000] after applying factors: 0 -25k 25k 0 """ expected = [ '3R\t0\t50\t0\n', '3R\t50\t100\t-250000\n', '3R\t100\t150\t250000\n', '3R\t150\t200\t0\n' ] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_scale_factors_subtract(): """ Test scale factor """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --ratio subtract --ignoreForNormalization chr_cigar " \ "-o {} -p 1 --outFileFormat bedgraph --normalizeTo1x 200".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) # The scale factors are [ 1. 0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() """ The distribution of reads for the bam file is: 0 100 200 |------------------------------------------------------------| testA.bam 3R ==============> <============== testB.bam 3R <============== ==============> ==============> ==============> ------------------------------------------------------------------------------ subtract: scale factors [1,0.5], after applying normalize to 1x, coverage of test_A is 0.5, thus the factor to reach a coverate of 1 is x2. Thus, the final scale factors are [2,1] after applying factors: 0 -1 1 0 """ expected = ['3R\t0\t50\t0\n', '3R\t50\t100\t-1\n', '3R\t100\t150\t1\n', '3R\t150\t200\t0\n'] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_scale_factors_ratio(): """ Test scale factor """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --ratio ratio --ignoreForNormalization chr_cigar " \ "-o {} -p 1 --outFileFormat bedgraph".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) # The scale factors are [ 1. 0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() """ The distribution of reads for the bam file is: 0 100 200 |------------------------------------------------------------| testA.bam 3R ==============> <============== testB.bam 3R <============== ==============> ==============> ==============> ------------------------------------------------------------------------------ ratio: 0 (0+1)/(1*0.5+1)=0.67 (1+1)/(1+2*0.5)=1 (scale factors [1,0.5]) (1+1)/(1+1*0.5)=1.33 """ expected = [ '3R\t0\t50\t1\n', '3R\t50\t100\t0.666667\n', '3R\t100\t150\t1.33333\n', '3R\t150\t200\t1\n' ] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_scale_factors_subtract(): """ Test scale factor """ outfile = '/tmp/test_file.bg' for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: args = "--bamfile1 {} --bamfile2 {} --operation subtract --ignoreForNormalization chr_cigar " \ "-o {} -p 1 --outFileFormat bedgraph --scaleFactorsMethod None --normalizeUsing CPM".format(A, B, outfile).split() bam_comp.main(args) # The scale factors are [ 1. 0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() """ The distribution of reads for the bam file is: 0 100 200 |------------------------------------------------------------| testA.bam 3R ==============> <============== testB.bam 3R <============== ==============> ==============> ==============> ------------------------------------------------------------------------------ subtract: After applying CPM normalization, the scale factors are [500000,250000] after applying factors: 0 -25k 25k 0 """ expected = ['3R\t0\t50\t0\n', '3R\t50\t100\t-250000\n', '3R\t100\t150\t250000\n', '3R\t150\t200\t0\n'] assert_equal(resp, expected) unlink(outfile)
def test_bam_compare_scale_factors_ratio(): """ Test scale factor """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --ratio ratio --ignoreForNormalization chr_cigar " \ "-o {} -p 1 --outFileFormat bedgraph".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) # The scale factors are [ 1. 0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() """ The distribution of reads for the bam file is: 0 100 200 |------------------------------------------------------------| testA.bam 3R ==============> <============== testB.bam 3R <============== ==============> ==============> ==============> ------------------------------------------------------------------------------ ratio: 0 (0+1)/(1*0.5+1)=0.67 (1+1)/(1+2*0.5)=1 (scale factors [1,0.5]) (1+1)/(1+1*0.5)=1.33 """ expected = ['3R\t0\t50\t1\n', '3R\t50\t100\t0.666667\n', '3R\t100\t150\t1.33333\n', '3R\t150\t200\t1\n'] assert_equal(resp, expected) unlink(outfile)