def test_all_orphan(): 'All reads end up in orphan' seqs = [SeqRecord(Seq('ACT'), id='seq1'), SeqRecord(Seq('ACT'), id='seq2')] seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None)) out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta') assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n' out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, ordered=False, out_format='fasta') assert '>seq1\nACT\n' in orphan_out_fhand.getvalue() assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
def test_all_orphan(): "All reads end up in orphan" seqs = [SeqRecord(Seq("ACT"), id="seq1"), SeqRecord(Seq("ACT"), id="seq2")] seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None)) out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, out_format="fasta") assert orphan_out_fhand.getvalue() == ">seq1\nACT\n>seq2\nACT\n" seq_fhand = NamedTemporaryFile(suffix=".fasta") write_seqs(seqs, seq_fhand, file_format="fasta") seq_fhand.flush() out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs_unordered(seq_fhand.name, out_fhand, orphan_out_fhand, out_format="fasta") assert ">seq1\nACT\n" in orphan_out_fhand.getvalue() assert ">seq2\nACT\n" in orphan_out_fhand.getvalue()
def test_all_orphan(): 'All reads end up in orphan' seqs = [SeqRecord(Seq('ACT'), id='seq1'), SeqRecord(Seq('ACT'), id='seq2')] seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None)) out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta') assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n' #seq_fhand = NamedTemporaryFile(suffix='.fasta') #write_seqs(seqs, seq_fhand, file_format='fasta') #seq_fhand.flush() out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, ordered=False, out_format='fasta') assert '>seq1\nACT\n' in orphan_out_fhand.getvalue() assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
def test_all_orphan(): 'All reads end up in orphan' seqs = [ SeqRecord(Seq('ACT'), id='seq1'), SeqRecord(Seq('ACT'), id='seq2') ] seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None)) out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta') assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n' out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, ordered=False, out_format='fasta') assert '>seq1\nACT\n' in orphan_out_fhand.getvalue() assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
def test_all_orphan(): 'All reads end up in orphan' seqs = [ SeqRecord(Seq('ACT'), id='seq1'), SeqRecord(Seq('ACT'), id='seq2') ] seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None)) out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta') assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n' seq_fhand = NamedTemporaryFile(suffix='.fasta') write_seqs(seqs, seq_fhand, file_format='fasta') seq_fhand.flush() out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs_unordered(seq_fhand.name, out_fhand, orphan_out_fhand, out_format='fasta') assert '>seq1\nACT\n' in orphan_out_fhand.getvalue() assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
def test_mate_pair_checker(): 'It test the mate pair function' # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqrecords([open(file1)], 'fastq') rev_seqs = read_seqrecords([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq') fwd_seqs = read_seqrecords([open(file1)], 'fastq') rev_seqs = read_seqrecords([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqrecords([open(file1)], 'fastq') rev_seqs = read_seqrecords([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
def test_pair_matcher(self): 'It test the pair matcher function' # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)], file_format='fastq') rev_seqs = read_seqs([open(file2)], file_format='fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq') fwd_seqs = read_seqs([open(file1)], 'fastq') rev_seqs = read_seqs([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)], 'fastq') rev_seqs = read_seqs([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with reads with no direcction file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)], 'fastq') rev_seqs = read_seqs([open(file2)], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp # File is not sorted file1 = '''@s1.f AACCAGTCAAC + CCCFFFFFGHH @s2.f AACCAGTCAAC + CCCFFFFFGHH @s1.r AACCAGTCAAC + CCCFFFFFGHH ''' file1 = StringIO(file1) seqs = read_seqs([file1], 'fastq') out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' try: match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() self.fail('MalformedFile error expected') except MalformedFile: pass
def test_pair_matcher(self): 'It test the pair matcher function' # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with reads with no direcction file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp # File is not sorted file1 = '''@s1.f AACCAGTCAAC + CCCFFFFFGHH @s2.f AACCAGTCAAC + CCCFFFFFGHH @s1.r AACCAGTCAAC + CCCFFFFFGHH ''' file1 = StringIO(file1) set_format(file1, 'fastq') seqs = read_seqs([file1]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' try: match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, check_order_buffer_size=10) output = out_fhand.getvalue() self.fail('ItemsNotSortedError error expected') except ItemsNotSortedError: pass
def test_mate_pair_unorderer_checker(): 'It test the mate pair function' # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() seqs = read_seqs([fhand]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, ordered=False) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() seqs = read_seqs([fhand]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, ordered=False) output = out_fhand.getvalue() assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() seqs = read_seqs([fhand]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, ordered=False) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # unordered file file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2_unordered.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() seqs = read_seqs([fhand]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, ordered=False) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with reads with no direcction file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() seqs = read_seqs([fhand]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, ordered=False) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp
def test_pair_matcher(self): "It test the pair matcher function" # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, "pairend1.sfastq") file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq") fwd_seqs = read_seqs([open(file1)], file_format="fastq") rev_seqs = read_seqs([open(file2)], file_format="fastq") out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = "fastq" seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert "@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output orp = orphan_out_fhand.getvalue() assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, "pairend1.sfastq") file2 = os.path.join(TEST_DATA_DIR, "pairend3.sfastq") fwd_seqs = read_seqs([open(file1)], "fastq") rev_seqs = read_seqs([open(file2)], "fastq") out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = "fastq" seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert "@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output assert "@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output orp = orphan_out_fhand.getvalue() assert "@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in orp assert "@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp assert "@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp file1 = os.path.join(TEST_DATA_DIR, "pairend4.sfastq") file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq") fwd_seqs = read_seqs([open(file1)], "fastq") rev_seqs = read_seqs([open(file2)], "fastq") out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = "fastq" seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert "@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output orp = orphan_out_fhand.getvalue() assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp # with reads with no direcction file1 = os.path.join(TEST_DATA_DIR, "pairend7.sfastq") file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq") fwd_seqs = read_seqs([open(file1)], "fastq") rev_seqs = read_seqs([open(file2)], "fastq") out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = "fastq" seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert "@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output orp = orphan_out_fhand.getvalue() assert "@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1" in orp assert "@seq7:136:FC706VJ:2:2104:15343:197393.hhhh" in orp assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC" in orp # File is not sorted file1 = """@s1.f AACCAGTCAAC + CCCFFFFFGHH @s2.f AACCAGTCAAC + CCCFFFFFGHH @s1.r AACCAGTCAAC + CCCFFFFFGHH """ file1 = StringIO(file1) seqs = read_seqs([file1], "fastq") out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = "fastq" try: match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() self.fail("MalformedFile error expected") except MalformedFile: pass