def test_bam_compare_filter_blacklist():
    """
    Test --samFlagInclude --samFlagExclude --minMappingQuality --ignoreDuplicates and --blackListFileName
    """
    outfile = '/tmp/test_file_filter.bg'
    args = "-b1 {} -b2 {} --normalizeTo1x 1400 -p 1 -o {} -of bedgraph --samFlagInclude 512 " \
           "--samFlagExclude 256 --minMappingQuality 5 --ignoreDuplicates " \
           "--blackListFileName {}".format(BAMFILE_FILTER1, BAMFILE_FILTER2, outfile, BEDFILE_FILTER)
    args = args.split()
    bam_comp.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    expected = [
        '3R\t0\t100\t0\n', '3R\t100\t150\t-0.220909\n',
        '3R\t150\t200\t-0.159356\n', '3R\t200\t250\t-0.0718929\n',
        '3R\t250\t300\t0.135883\n', '3R\t300\t350\t0.103093\n',
        '3R\t350\t400\t-0.0895516\n', '3R\t400\t450\t0.0308374\n',
        '3R\t450\t500\t0.0989418\n', '3R\t500\t550\t0.207044\n',
        '3R\t550\t600\t0.0198996\n', '3R\t600\t650\t-0.0957241\n',
        '3R\t650\t700\t0.00968255\n', '3R\t700\t750\t-0.040642\n',
        '3R\t750\t800\t-0.123451\n', '3R\t900\t950\t0.212545\n',
        '3R\t950\t1000\t0.199309\n', '3R\t1000\t1050\t0.167945\n',
        '3R\t1050\t1500\t0\n'
    ]
    assert_equal(resp, expected)
    unlink(outfile)
def test_bam_compare_filter_blacklist():
    """
    Test --samFlagInclude --samFlagExclude --minMappingQuality --ignoreDuplicates and --blackListFileName
    """
    outfile = '/tmp/test_file_filter.bg'
    args = "-b1 {} -b2 {} --normalizeTo1x 1400 -p 1 -o {} -of bedgraph --samFlagInclude 512 " \
           "--samFlagExclude 256 --minMappingQuality 5 --ignoreDuplicates " \
           "--blackListFileName {}".format(BAMFILE_FILTER1, BAMFILE_FILTER2, outfile, BEDFILE_FILTER)
    args = args.split()
    bam_comp.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    expected = ['3R\t0\t100\t0\n', '3R\t100\t150\t-0.220909\n',
                '3R\t150\t200\t-0.159356\n', '3R\t200\t250\t-0.0718929\n',
                '3R\t250\t300\t0.135883\n', '3R\t300\t350\t0.103093\n',
                '3R\t350\t400\t-0.0895516\n', '3R\t400\t450\t0.0308374\n',
                '3R\t450\t500\t0.0989418\n', '3R\t500\t550\t0.207044\n',
                '3R\t550\t600\t0.0198996\n', '3R\t600\t650\t-0.0957241\n',
                '3R\t650\t700\t0.00968255\n', '3R\t700\t750\t-0.040642\n',
                '3R\t750\t800\t-0.123451\n', '3R\t900\t950\t0.212545\n',
                '3R\t950\t1000\t0.199309\n', '3R\t1000\t1050\t0.167945\n',
                '3R\t1050\t1500\t0\n']
    assert_equal(resp, expected)
    unlink(outfile)
def test_bam_compare_diff_files():
    """
    Test with two different files
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --ratio subtract " \
           "-o {} -p 1 --outFileFormat bedgraph".format(BAMFILE_A, BAMFILE_B, outfile).split()

    bam_comp.main(args)

    resp = open(outfile, 'r').readlines()
    expected = ['3R\t0\t50\t0.00\n', '3R\t50\t100\t-1.00\n', '3R\t100\t150\t0.00\n', '3R\t150\t200\t-1.0\n']
    assert resp == expected, "{} != {}".format(resp, expected)
    unlink(outfile)
def test_bam_compare_ZoverZ():
    """
    Ensure --skipZeroOverZero works in bamCompare
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --outFileFormat bedgraph --scaleFactors 1:1 -o {} " \
           "--skipZeroOverZero".format(BAMFILE_A, BAMFILE_B, outfile).split()
    bam_comp.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    expected = ['3R\t50\t100\t-1\n', '3R\t100\t150\t0\n', '3R\t150\t200\t-0.584963\n']
    assert_equal(resp, expected)
    unlink(outfile)
def test_bam_compare_pseudocounts():
    """
    Test with different pseudocounts
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --outFileFormat bedgraph --scaleFactors 1:1 -o {} " \
           "--pseudocount 1 0".format(BAMFILE_A, BAMFILE_B, outfile).split()
    bam_comp.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    expected = ['3R\t0\t50\tinf\n', '3R\t50\t100\t0\n', '3R\t100\t150\t1\n', '3R\t150\t200\t0\n']
    assert_equal(resp, expected)
    unlink(outfile)
def test_bam_compare_diff_files():
    """
    Test with two different files
    """
    outfile = '/tmp/test_file.bg'
    for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]:
        args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --operation subtract " \
               "-o {} -p 1 --outFileFormat bedgraph".format(A, B, outfile).split()
        bam_comp.main(args)

        _foo = open(outfile, 'r')
        resp = _foo.readlines()
        _foo.close()
        expected = ['3R\t0\t50\t0\n', '3R\t50\t100\t-1\n', '3R\t100\t150\t0\n', '3R\t150\t200\t-1\n']
        assert_equal(resp, expected)
        unlink(outfile)
def test_bam_compare_extend():
    """
    Test read extension
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --extend 100 --scaleFactors 1:1 --ratio subtract " \
           "-o {} -p 1 --outFileFormat bedgraph".format(BAMFILE_A, BAMFILE_B, outfile).split()

    bam_comp.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    expected = ['3R\t0\t100\t-1\n', '3R\t100\t150\t1\n', '3R\t150\t200\t-1\n']
    assert_equal(resp, expected)
    unlink(outfile)
def test_bam_compare_arguments():
    """
    Test minimal command line args for bamCoverage. The ratio
    between the same file is taken, therefore, the expected value
    is 1.0 for all bins.
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} " \
           "-o {} -p 1 --outFileFormat bedgraph --ratio ratio".format(BAMFILE_B, BAMFILE_B, outfile).split()

    bam_comp.main(args)

    resp = open(outfile, 'r').readlines()
    expected = ['3R\t0\t200\t1.0\n']
    assert resp == expected, "{} != {}".format(resp, expected)
    unlink(outfile)
def test_bam_compare_diff_files_skipnas():
    """
    Test skipnas
    Compared to the previous tests, any region that do not have coverage (in either of the bam files)
    is not included in the bedgraphfile.
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --ratio subtract " \
           "-o {} -p 1 --outFileFormat bedgraph --skipNAs".format(BAMFILE_A, BAMFILE_B, outfile).split()

    bam_comp.main(args)

    resp = open(outfile, 'r').readlines()
    expected = ['3R\t100\t150\t0.00\n', '3R\t150\t200\t-1.0\n']
    assert resp == expected, "{} != {}".format(resp, expected)
    unlink(outfile)
def test_bam_compare_extend():
    """
    Test read extension
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --extend 100 --scaleFactors 1:1 --ratio subtract " \
           "-o {} -p 1 --outFileFormat bedgraph".format(BAMFILE_A, BAMFILE_B, outfile).split()

    bam_comp.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    expected = ['3R\t0\t100\t-1\n', '3R\t100\t150\t1\n', '3R\t150\t200\t-1\n']
    assert_equal(resp, expected)
    unlink(outfile)
Example #11
0
def test_bam_compare_ZoverZ():
    """
    Ensure --skipZeroOverZero works in bamCompare
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --outFileFormat bedgraph --scaleFactors 1:1 -o {} " \
           "--skipZeroOverZero".format(BAMFILE_A, BAMFILE_B, outfile).split()
    bam_comp.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    expected = [
        '3R\t50\t100\t-1\n', '3R\t100\t150\t0\n', '3R\t150\t200\t-0.584963\n'
    ]
    assert_equal(resp, expected)
    unlink(outfile)
def test_bam_compare_diff_files_skipnas():
    """
    Test skipnas
    Compared to the previous tests, any region that do not have coverage (in either of the bam files)
    is not included in the bedgraph file.
    """
    outfile = '/tmp/test_file.bg'
    for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]:
        args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --operation subtract " \
               "-o {} -p 1 --outFileFormat bedgraph --skipNAs".format(A, B, outfile).split()
        bam_comp.main(args)

        _foo = open(outfile, 'r')
        resp = _foo.readlines()
        _foo.close()
        expected = ['3R\t100\t150\t0\n', '3R\t150\t200\t-1\n']
        assert_equal(resp, expected)
        unlink(outfile)
Example #13
0
def test_bam_compare_pseudocounts():
    """
    Test with different pseudocounts
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --outFileFormat bedgraph --scaleFactors 1:1 -o {} " \
           "--pseudocount 1 0".format(BAMFILE_A, BAMFILE_B, outfile).split()
    bam_comp.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    expected = [
        '3R\t0\t50\tinf\n', '3R\t50\t100\t0\n', '3R\t100\t150\t1\n',
        '3R\t150\t200\t0\n'
    ]
    assert_equal(resp, expected)
    unlink(outfile)
def test_bam_compare_arguments():
    """
    Test minimal command line args for bamCoverage. The ratio
    between the same file is taken, therefore, the expected value
    is 1.0 for all bins.
    """
    outfile = '/tmp/test_file.bg'
    for fname in [BAMFILE_B, CRAMFILE_B]:
        args = "--bamfile1 {} --bamfile2 {} " \
               "-o {} -p 1 --outFileFormat bedgraph --operation ratio".format(fname, fname, outfile).split()
        bam_comp.main(args)

        _foo = open(outfile, 'r')
        resp = _foo.readlines()
        _foo.close()
        expected = ['3R\t0\t200\t1\n']
        assert_equal(resp, expected)
        unlink(outfile)
Example #15
0
def test_bam_compare_diff_files_skipnas():
    """
    Test skipnas
    Compared to the previous tests, any region that do not have coverage (in either of the bam files)
    is not included in the bedgraph file.
    """
    outfile = '/tmp/test_file.bg'
    for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]:
        args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --operation subtract " \
               "-o {} -p 1 --outFileFormat bedgraph --skipNAs".format(A, B, outfile).split()
        bam_comp.main(args)

        _foo = open(outfile, 'r')
        resp = _foo.readlines()
        _foo.close()
        expected = ['3R\t100\t150\t0\n', '3R\t150\t200\t-1\n']
        assert_equal(resp, expected)
        unlink(outfile)
Example #16
0
def test_bam_compare_arguments():
    """
    Test minimal command line args for bamCoverage. The ratio
    between the same file is taken, therefore, the expected value
    is 1.0 for all bins.
    """
    outfile = '/tmp/test_file.bg'
    for fname in [BAMFILE_B, CRAMFILE_B]:
        args = "--bamfile1 {} --bamfile2 {} " \
               "-o {} -p 1 --outFileFormat bedgraph --operation ratio".format(fname, fname, outfile).split()
        bam_comp.main(args)

        _foo = open(outfile, 'r')
        resp = _foo.readlines()
        _foo.close()
        expected = ['3R\t0\t200\t1\n']
        assert_equal(resp, expected)
        unlink(outfile)
def test_bam_compare_scale_factors_subtract():
    """
    Test scale factor
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --ratio subtract --ignoreForNormalization chr_cigar " \
           "-o {} -p 1 --outFileFormat bedgraph --normalizeTo1x 200".format(BAMFILE_A, BAMFILE_B, outfile).split()

    bam_comp.main(args)

    # The scale factors are [ 1.   0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    """
    The distribution of reads for the bam file is:

                  0                              100                           200
                  |------------------------------------------------------------|
    testA.bam  3R                                ==============>
                                                                <==============


    testB.bam  3R                 <==============               ==============>
                                                 ==============>
                                                                ==============>

    ------------------------------------------------------------------------------

    subtract: scale factors [1,0.5], after applying normalize to 1x, coverage of test_A is 0.5, thus
    the factor to reach a coverate of 1 is x2. Thus, the final scale factors are [2,1]

    after applying factors:    0         -1              1              0

    """

    expected = [
        '3R\t0\t50\t0\n', '3R\t50\t100\t-1\n', '3R\t100\t150\t1\n',
        '3R\t150\t200\t0\n'
    ]
    assert_equal(resp, expected)
    unlink(outfile)
Example #18
0
def test_bam_compare_diff_files():
    """
    Test with two different files
    """
    outfile = '/tmp/test_file.bg'
    for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]:
        args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --operation subtract " \
               "-o {} -p 1 --outFileFormat bedgraph".format(A, B, outfile).split()
        bam_comp.main(args)

        _foo = open(outfile, 'r')
        resp = _foo.readlines()
        _foo.close()
        expected = [
            '3R\t0\t50\t0\n', '3R\t50\t100\t-1\n', '3R\t100\t150\t0\n',
            '3R\t150\t200\t-1\n'
        ]
        assert_equal(resp, expected)
        unlink(outfile)
Example #19
0
def test_bam_compare_scale_factors_subtract():
    """
    Test scale factor
    """
    outfile = '/tmp/test_file.bg'
    for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]:
        args = "--bamfile1 {} --bamfile2 {} --operation subtract --ignoreForNormalization chr_cigar " \
               "-o {} -p 1 --outFileFormat bedgraph --scaleFactorsMethod None --normalizeUsing CPM".format(A, B, outfile).split()
        bam_comp.main(args)

        # The scale factors are [ 1.   0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A

        _foo = open(outfile, 'r')
        resp = _foo.readlines()
        _foo.close()
        """
        The distribution of reads for the bam file is:

                      0                              100                           200
                      |------------------------------------------------------------|
        testA.bam  3R                                ==============>
                                                                    <==============


        testB.bam  3R                 <==============               ==============>
                                                     ==============>
                                                                    ==============>

        ------------------------------------------------------------------------------

        subtract: After applying CPM normalization, the scale factors are [500000,250000]

        after applying factors:    0         -25k              25k              0

        """

        expected = [
            '3R\t0\t50\t0\n', '3R\t50\t100\t-250000\n',
            '3R\t100\t150\t250000\n', '3R\t150\t200\t0\n'
        ]
        assert_equal(resp, expected)
        unlink(outfile)
def test_bam_compare_scale_factors_subtract():
    """
    Test scale factor
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --ratio subtract --ignoreForNormalization chr_cigar " \
           "-o {} -p 1 --outFileFormat bedgraph --normalizeTo1x 200".format(BAMFILE_A, BAMFILE_B, outfile).split()

    bam_comp.main(args)

    # The scale factors are [ 1.   0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()

    """
    The distribution of reads for the bam file is:

                  0                              100                           200
                  |------------------------------------------------------------|
    testA.bam  3R                                ==============>
                                                                <==============


    testB.bam  3R                 <==============               ==============>
                                                 ==============>
                                                                ==============>

    ------------------------------------------------------------------------------

    subtract: scale factors [1,0.5], after applying normalize to 1x, coverage of test_A is 0.5, thus
    the factor to reach a coverate of 1 is x2. Thus, the final scale factors are [2,1]

    after applying factors:    0         -1              1              0

    """

    expected = ['3R\t0\t50\t0\n', '3R\t50\t100\t-1\n', '3R\t100\t150\t1\n', '3R\t150\t200\t0\n']
    assert_equal(resp, expected)
    unlink(outfile)
def test_bam_compare_scale_factors_ratio():
    """
    Test scale factor
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --ratio ratio --ignoreForNormalization chr_cigar " \
           "-o {} -p 1 --outFileFormat bedgraph".format(BAMFILE_A, BAMFILE_B, outfile).split()

    bam_comp.main(args)

    # The scale factors are [ 1.   0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    """
    The distribution of reads for the bam file is:

                  0                              100                           200
                  |------------------------------------------------------------|
    testA.bam  3R                                ==============>
                                                                <==============


    testB.bam  3R                 <==============               ==============>
                                                 ==============>
                                                                ==============>

    ------------------------------------------------------------------------------

    ratio:             0      (0+1)/(1*0.5+1)=0.67             (1+1)/(1+2*0.5)=1
    (scale factors [1,0.5])                   (1+1)/(1+1*0.5)=1.33
    """

    expected = [
        '3R\t0\t50\t1\n', '3R\t50\t100\t0.666667\n', '3R\t100\t150\t1.33333\n',
        '3R\t150\t200\t1\n'
    ]
    assert_equal(resp, expected)
    unlink(outfile)
def test_bam_compare_scale_factors_subtract():
    """
    Test scale factor
    """
    outfile = '/tmp/test_file.bg'
    for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]:
        args = "--bamfile1 {} --bamfile2 {} --operation subtract --ignoreForNormalization chr_cigar " \
               "-o {} -p 1 --outFileFormat bedgraph --scaleFactorsMethod None --normalizeUsing CPM".format(A, B, outfile).split()
        bam_comp.main(args)

        # The scale factors are [ 1.   0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A

        _foo = open(outfile, 'r')
        resp = _foo.readlines()
        _foo.close()

        """
        The distribution of reads for the bam file is:

                      0                              100                           200
                      |------------------------------------------------------------|
        testA.bam  3R                                ==============>
                                                                    <==============


        testB.bam  3R                 <==============               ==============>
                                                     ==============>
                                                                    ==============>

        ------------------------------------------------------------------------------

        subtract: After applying CPM normalization, the scale factors are [500000,250000]

        after applying factors:    0         -25k              25k              0

        """

        expected = ['3R\t0\t50\t0\n', '3R\t50\t100\t-250000\n', '3R\t100\t150\t250000\n', '3R\t150\t200\t0\n']
        assert_equal(resp, expected)
        unlink(outfile)
def test_bam_compare_scale_factors_ratio():
    """
    Test scale factor
    """
    outfile = '/tmp/test_file.bg'
    args = "--bamfile1 {} --bamfile2 {} --ratio ratio --ignoreForNormalization chr_cigar " \
           "-o {} -p 1 --outFileFormat bedgraph".format(BAMFILE_A, BAMFILE_B, outfile).split()

    bam_comp.main(args)

    # The scale factors are [ 1.   0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()

    """
    The distribution of reads for the bam file is:

                  0                              100                           200
                  |------------------------------------------------------------|
    testA.bam  3R                                ==============>
                                                                <==============


    testB.bam  3R                 <==============               ==============>
                                                 ==============>
                                                                ==============>

    ------------------------------------------------------------------------------

    ratio:             0      (0+1)/(1*0.5+1)=0.67             (1+1)/(1+2*0.5)=1
    (scale factors [1,0.5])                   (1+1)/(1+1*0.5)=1.33
    """

    expected = ['3R\t0\t50\t1\n', '3R\t50\t100\t0.666667\n', '3R\t100\t150\t1.33333\n', '3R\t150\t200\t1\n']
    assert_equal(resp, expected)
    unlink(outfile)