def test_binaries(self): 'It test the binaries' interleave_bin = os.path.join(BIN_DIR, 'interleave_pairs') deinterleave_bin = os.path.join(BIN_DIR, 'deinterleave_pairs') assert 'usage' in check_output([interleave_bin, '-h']) assert 'usage' in check_output([deinterleave_bin, '-h']) in_fpath1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') in_fpath2 = os.path.join(TEST_DATA_DIR, 'pairend1b.sfastq') out_fhand = NamedTemporaryFile() check_output( [interleave_bin, '-o', out_fhand.name, in_fpath1, in_fpath2]) result = open(out_fhand.name).read() assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result assert '@seq5:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result out_fhand1 = NamedTemporaryFile() out_fhand2 = NamedTemporaryFile() check_output([ deinterleave_bin, '-o', out_fhand1.name, out_fhand2.name, out_fhand.name ]) assert open(in_fpath1).read() == open(out_fhand1.name).read() assert open(in_fpath2).read() == open(out_fhand2.name).read() out_fhand1 = NamedTemporaryFile() out_fhand2 = NamedTemporaryFile() check_output([ deinterleave_bin, '-o', out_fhand1.name, out_fhand2.name, out_fhand.name, '-Z' ]) assert open(in_fpath1).read() == BgzfReader(out_fhand1.name).read(2000) assert open(in_fpath2).read() == BgzfReader(out_fhand2.name).read(2000) # skip checks in_fpath1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') in_fpath2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') out_fhand = NamedTemporaryFile() stderr = NamedTemporaryFile() ret_code = call( [interleave_bin, '-o', out_fhand.name, in_fpath1, in_fpath2], stderr=stderr) assert int(ret_code) assert 'from the two files do not match' in open(stderr.name).read() check_output( [interleave_bin, '-o', out_fhand.name, '-s', in_fpath1, in_fpath2]) result = open(out_fhand.name).read() assert 'seq4:136:FC706VJ:2:2104:15343:197393' in result assert 'seq3:136:FC706VJ:2:2104:15343:197393' in result
def test_pair_matcher_bin(self): 'It test the pair matcher binary' pair_matcher_bin = os.path.join(SEQ_BIN_DIR, 'pair_matcher') assert 'usage' in check_output([pair_matcher_bin, '-h']) in_fpath = os.path.join(TEST_DATA_DIR, 'pairend5.sfastq') out_fhand = NamedTemporaryFile() orphan_fhand = NamedTemporaryFile() check_output([ pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name, in_fpath ]) result = open(out_fhand.name).read() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result orp = open(orphan_fhand.name).read() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # compressed output in_fpath = os.path.join(TEST_DATA_DIR, 'pairend5.sfastq') out_fhand = NamedTemporaryFile() orphan_fhand = NamedTemporaryFile() check_output([ pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name, in_fpath, '-Z' ]) result = BgzfReader(out_fhand.name).read(2000) assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result orp = BgzfReader(orphan_fhand.name).read(2000) assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # unordered file in_fpath = os.path.join(TEST_DATA_DIR, 'pairend6.sfastq') out_fhand = NamedTemporaryFile() orphan_fhand = NamedTemporaryFile() check_output([ pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name, in_fpath, '-u' ]) result = open(out_fhand.name).read() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result orp = open(orphan_fhand.name).read() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
def __init__(self, pbiFilename, chunk_size=10000000): self._chunk_start = None self._pbiFilename = abspath(expanduser(pbiFilename)) self._get_blocks() with BgzfReader(self._pbiFilename, mode='rb') as f: self._loadHeader(f) holeNumbers = self._loadMainIndex(f, self._to_virtual_offset, zmw_only=True) self._make_chunks(chunk_size, holeNumbers)
def __init__(self, pbiFilename, chunk_start=None, chunk_size=None, to_virtual_offset=None): self._chunk_start = chunk_start self._chunk_size = chunk_size pbiFilename = abspath(expanduser(pbiFilename)) with BgzfReader(pbiFilename, mode='rb') as f: self._loadHeader(f) self._loadMainIndex(f, to_virtual_offset) self._loadOffsets(f)
def test_pair_matcher_bin(self): 'It test the pair matcher binary' pair_matcher_bin = os.path.join(BIN_DIR, 'pair_matcher') assert 'usage' in check_output([pair_matcher_bin, '-h']) in_fpath = os.path.join(TEST_DATA_DIR, 'pairend5.sfastq') out_fhand = NamedTemporaryFile() orphan_fhand = NamedTemporaryFile() check_output([ pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name, in_fpath ]) result = open(out_fhand.name).read() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result orp = open(orphan_fhand.name).read() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp in_fpath = os.path.join(TEST_DATA_DIR, 'pairend5.sfastq') out_fhand = NamedTemporaryFile() orphan_fhand = NamedTemporaryFile() stderr = NamedTemporaryFile() try: check_output([ pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name, in_fpath, '-l', '1' ], stderr=stderr) self.fail('error expected') except CalledProcessError: assert 'There are too many consecutive' in open(stderr.name).read() # compressed output in_fpath = os.path.join(TEST_DATA_DIR, 'pairend5.sfastq') out_fhand = NamedTemporaryFile() orphan_fhand = NamedTemporaryFile() check_output([ pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name, in_fpath, '-Z' ]) result = BgzfReader(out_fhand.name).read(2000) assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result orp = BgzfReader(orphan_fhand.name).read(2000) assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # unordered file in_fpath = os.path.join(TEST_DATA_DIR, 'pairend6.sfastq') out_fhand = NamedTemporaryFile() orphan_fhand = NamedTemporaryFile() check_output([ pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name, in_fpath, '-u' ]) result = open(out_fhand.name).read() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result orp = open(orphan_fhand.name).read() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
def __init__(self, pbiFilename): pbiFilename = abspath(expanduser(pbiFilename)) with BgzfReader(pbiFilename, mode='rb') as f: self._loadHeader(f)