def test_mate_pair_checker(): 'It test the mate pair function' # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqrecords([open(file1)], 'fastq') rev_seqs = read_seqrecords([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq') fwd_seqs = read_seqrecords([open(file1)], 'fastq') rev_seqs = read_seqrecords([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqrecords([open(file1)], 'fastq') rev_seqs = read_seqrecords([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
def test_pair_matcher(self): 'It test the pair matcher function' # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)], file_format='fastq') rev_seqs = read_seqs([open(file2)], file_format='fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq') fwd_seqs = read_seqs([open(file1)], 'fastq') rev_seqs = read_seqs([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)], 'fastq') rev_seqs = read_seqs([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with reads with no direcction file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)], 'fastq') rev_seqs = read_seqs([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp # File is not sorted file1 = '''@s1.f AACCAGTCAAC + CCCFFFFFGHH @s2.f AACCAGTCAAC + CCCFFFFFGHH @s1.r AACCAGTCAAC + CCCFFFFFGHH ''' file1 = StringIO(file1) seqs = read_seqs([file1], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' try: match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() self.fail('MalformedFile error expected') except MalformedFile: pass
def test_pair_matcher(self): 'It test the pair matcher function' # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with reads with no direcction file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp # File is not sorted file1 = '''@s1.f AACCAGTCAAC + CCCFFFFFGHH @s2.f AACCAGTCAAC + CCCFFFFFGHH @s1.r AACCAGTCAAC + CCCFFFFFGHH ''' file1 = StringIO(file1) set_format(file1, 'fastq') seqs = read_seqs([file1]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' try: match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, check_order_buffer_size=10) output = out_fhand.getvalue() self.fail('ItemsNotSortedError error expected') except ItemsNotSortedError: pass
def test_pair_matcher(self): "It test the pair matcher function" # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, "pairend1.sfastq") file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq") fwd_seqs = read_seqs([open(file1)], file_format="fastq") rev_seqs = read_seqs([open(file2)], file_format="fastq") out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = "fastq" seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert "@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output orp = orphan_out_fhand.getvalue() assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, "pairend1.sfastq") file2 = os.path.join(TEST_DATA_DIR, "pairend3.sfastq") fwd_seqs = read_seqs([open(file1)], "fastq") rev_seqs = read_seqs([open(file2)], "fastq") out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = "fastq" seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert "@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output assert "@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output orp = orphan_out_fhand.getvalue() assert "@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in orp assert "@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp assert "@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp file1 = os.path.join(TEST_DATA_DIR, "pairend4.sfastq") file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq") fwd_seqs = read_seqs([open(file1)], "fastq") rev_seqs = read_seqs([open(file2)], "fastq") out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = "fastq" seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert "@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output orp = orphan_out_fhand.getvalue() assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp # with reads with no direcction file1 = os.path.join(TEST_DATA_DIR, "pairend7.sfastq") file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq") fwd_seqs = read_seqs([open(file1)], "fastq") rev_seqs = read_seqs([open(file2)], "fastq") out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = "fastq" seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert "@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output orp = orphan_out_fhand.getvalue() assert "@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1" in orp assert "@seq7:136:FC706VJ:2:2104:15343:197393.hhhh" in orp assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC" in orp # File is not sorted file1 = """@s1.f AACCAGTCAAC + CCCFFFFFGHH @s2.f AACCAGTCAAC + CCCFFFFFGHH @s1.r AACCAGTCAAC + CCCFFFFFGHH """ file1 = StringIO(file1) seqs = read_seqs([file1], "fastq") out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = "fastq" try: match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() self.fail("MalformedFile error expected") except MalformedFile: pass