def test_all_orphan(): 'All reads end up in orphan' seqs = [SeqRecord(Seq('ACT'), id='seq1'), SeqRecord(Seq('ACT'), id='seq2')] seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None)) out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta') assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n' out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, ordered=False, out_format='fasta') assert '>seq1\nACT\n' in orphan_out_fhand.getvalue() assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
def test_all_orphan(): 'All reads end up in orphan' seqs = [ SeqRecord(Seq('ACT'), id='seq1'), SeqRecord(Seq('ACT'), id='seq2') ] seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None)) out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta') assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n' out_fhand = StringIO() orphan_out_fhand = StringIO() match_pairs(seqs, out_fhand, orphan_out_fhand, ordered=False, out_format='fasta') assert '>seq1\nACT\n' in orphan_out_fhand.getvalue() assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
def test_pair_matcher(self): 'It test the pair matcher function' # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with reads with no direcction file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fwd_seqs = read_seqs([open(file1)]) rev_seqs = read_seqs([open(file2)]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' seqs = flat_zip_longest(fwd_seqs, rev_seqs) match_pairs(seqs, out_fhand, orphan_out_fhand, out_format) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp # File is not sorted file1 = '''@s1.f AACCAGTCAAC + CCCFFFFFGHH @s2.f AACCAGTCAAC + CCCFFFFFGHH @s1.r AACCAGTCAAC + CCCFFFFFGHH ''' file1 = StringIO(file1) set_format(file1, 'fastq') seqs = read_seqs([file1]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' try: match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, check_order_buffer_size=10) output = out_fhand.getvalue() self.fail('ItemsNotSortedError error expected') except ItemsNotSortedError: pass
def test_mate_pair_unorderer_checker(): 'It test the mate pair function' # with equal seqs but the last ones file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() seqs = read_seqs([fhand]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, ordered=False) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with the firsts seqs different file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() seqs = read_seqs([fhand]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, ordered=False) output = out_fhand.getvalue() assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() seqs = read_seqs([fhand]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, ordered=False) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # unordered file file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2_unordered.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() seqs = read_seqs([fhand]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, ordered=False) output = out_fhand.getvalue() assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp # with reads with no direcction file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq') file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq') fhand = NamedTemporaryFile() fhand.write(open(file1).read()) fhand.write(open(file2).read()) fhand.flush() seqs = read_seqs([fhand]) out_fhand = StringIO() orphan_out_fhand = StringIO() out_format = 'fastq' match_pairs(seqs, out_fhand, orphan_out_fhand, out_format, ordered=False) output = out_fhand.getvalue() assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output orp = orphan_out_fhand.getvalue() assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp