Example #1
0
    def test_all_orphan():
        'All reads end up in orphan'
        seqs = [SeqRecord(Seq('ACT'), id='seq1'),
                SeqRecord(Seq('ACT'), id='seq2')]
        seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None))
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta')
        assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n'

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs(seqs, out_fhand, orphan_out_fhand, ordered=False,
                    out_format='fasta')
        assert '>seq1\nACT\n' in orphan_out_fhand.getvalue()
        assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
    def test_all_orphan():
        "All reads end up in orphan"
        seqs = [SeqRecord(Seq("ACT"), id="seq1"), SeqRecord(Seq("ACT"), id="seq2")]
        seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None))
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format="fasta")
        assert orphan_out_fhand.getvalue() == ">seq1\nACT\n>seq2\nACT\n"

        seq_fhand = NamedTemporaryFile(suffix=".fasta")
        write_seqs(seqs, seq_fhand, file_format="fasta")
        seq_fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs_unordered(seq_fhand.name, out_fhand, orphan_out_fhand, out_format="fasta")
        assert ">seq1\nACT\n" in orphan_out_fhand.getvalue()
        assert ">seq2\nACT\n" in orphan_out_fhand.getvalue()
Example #3
0
    def test_all_orphan():
        'All reads end up in orphan'
        seqs = [SeqRecord(Seq('ACT'), id='seq1'),
                SeqRecord(Seq('ACT'), id='seq2')]
        seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None))
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta')
        assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n'

        #seq_fhand = NamedTemporaryFile(suffix='.fasta')
        #write_seqs(seqs, seq_fhand, file_format='fasta')
        #seq_fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs(seqs, out_fhand, orphan_out_fhand, ordered=False,
                    out_format='fasta')
        assert '>seq1\nACT\n' in orphan_out_fhand.getvalue()
        assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
Example #4
0
    def test_all_orphan():
        'All reads end up in orphan'
        seqs = [
            SeqRecord(Seq('ACT'), id='seq1'),
            SeqRecord(Seq('ACT'), id='seq2')
        ]
        seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None))
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta')
        assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n'

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs(seqs,
                    out_fhand,
                    orphan_out_fhand,
                    ordered=False,
                    out_format='fasta')
        assert '>seq1\nACT\n' in orphan_out_fhand.getvalue()
        assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
    def test_all_orphan():
        'All reads end up in orphan'
        seqs = [
            SeqRecord(Seq('ACT'), id='seq1'),
            SeqRecord(Seq('ACT'), id='seq2')
        ]
        seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None))
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta')
        assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n'

        seq_fhand = NamedTemporaryFile(suffix='.fasta')
        write_seqs(seqs, seq_fhand, file_format='fasta')
        seq_fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs_unordered(seq_fhand.name,
                              out_fhand,
                              orphan_out_fhand,
                              out_format='fasta')
        assert '>seq1\nACT\n' in orphan_out_fhand.getvalue()
        assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
    def test_mate_pair_checker():
        'It test the mate pair function'
        # with equal seqs but the last ones
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fwd_seqs = read_seqrecords([open(file1)], 'fastq')
        rev_seqs = read_seqrecords([open(file2)], 'fastq')

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with the firsts seqs different
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq')
        fwd_seqs = read_seqrecords([open(file1)], 'fastq')
        rev_seqs = read_seqrecords([open(file2)], 'fastq')
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp
        assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fwd_seqs = read_seqrecords([open(file1)], 'fastq')
        rev_seqs = read_seqrecords([open(file2)], 'fastq')
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
    def test_pair_matcher(self):
        'It test the pair matcher function'
        # with equal seqs but the last ones
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fwd_seqs = read_seqs([open(file1)], file_format='fastq')
        rev_seqs = read_seqs([open(file2)], file_format='fastq')

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with the firsts seqs different
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq')
        fwd_seqs = read_seqs([open(file1)], 'fastq')
        rev_seqs = read_seqs([open(file2)], 'fastq')
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp
        assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fwd_seqs = read_seqs([open(file1)], 'fastq')
        rev_seqs = read_seqs([open(file2)], 'fastq')
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with reads with no direcction
        file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fwd_seqs = read_seqs([open(file1)], 'fastq')
        rev_seqs = read_seqs([open(file2)], 'fastq')
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)
        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output

        orp = orphan_out_fhand.getvalue()
        assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp
        assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp

        # File is not sorted
        file1 = '''@s1.f
AACCAGTCAAC
+
CCCFFFFFGHH
@s2.f
AACCAGTCAAC
+
CCCFFFFFGHH
@s1.r
AACCAGTCAAC
+
CCCFFFFFGHH
'''
        file1 = StringIO(file1)
        seqs = read_seqs([file1], 'fastq')
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        try:
            match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)
            output = out_fhand.getvalue()
            self.fail('MalformedFile error expected')
        except MalformedFile:
            pass
Example #8
0
    def test_pair_matcher(self):
        'It test the pair matcher function'
        # with equal seqs but the last ones
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fwd_seqs = read_seqs([open(file1)])
        rev_seqs = read_seqs([open(file2)])

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with the firsts seqs different
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq')
        fwd_seqs = read_seqs([open(file1)])
        rev_seqs = read_seqs([open(file2)])
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp
        assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fwd_seqs = read_seqs([open(file1)])
        rev_seqs = read_seqs([open(file2)])
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with reads with no direcction
        file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fwd_seqs = read_seqs([open(file1)])
        rev_seqs = read_seqs([open(file2)])
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)
        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output

        orp = orphan_out_fhand.getvalue()
        assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp
        assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp

        # File is not sorted
        file1 = '''@s1.f
AACCAGTCAAC
+
CCCFFFFFGHH
@s2.f
AACCAGTCAAC
+
CCCFFFFFGHH
@s1.r
AACCAGTCAAC
+
CCCFFFFFGHH
'''
        file1 = StringIO(file1)
        set_format(file1, 'fastq')
        seqs = read_seqs([file1])
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        try:
            match_pairs(seqs, out_fhand, orphan_out_fhand, out_format,
                        check_order_buffer_size=10)
            output = out_fhand.getvalue()
            self.fail('ItemsNotSortedError error expected')
        except ItemsNotSortedError:
            pass
Example #9
0
    def test_mate_pair_unorderer_checker():
        'It test the mate pair function'
        # with equal seqs but the last ones
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        seqs = read_seqs([fhand])

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format,
                    ordered=False)

        output = out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with the firsts seqs different
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        seqs = read_seqs([fhand])

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format,
                    ordered=False)

        output = out_fhand.getvalue()
        assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp
        assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        seqs = read_seqs([fhand])

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format,
                    ordered=False)

        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # unordered file
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2_unordered.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        seqs = read_seqs([fhand])

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format,
                    ordered=False)
        output = out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with reads with no direcction
        file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        seqs = read_seqs([fhand])

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format,
                    ordered=False)
        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output

        orp = orphan_out_fhand.getvalue()
        assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp
        assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp
    def test_pair_matcher(self):
        "It test the pair matcher function"
        # with equal seqs but the last ones
        file1 = os.path.join(TEST_DATA_DIR, "pairend1.sfastq")
        file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq")
        fwd_seqs = read_seqs([open(file1)], file_format="fastq")
        rev_seqs = read_seqs([open(file2)], file_format="fastq")

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = "fastq"
        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert "@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output
        assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output
        orp = orphan_out_fhand.getvalue()
        assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp

        # with the firsts seqs different
        file1 = os.path.join(TEST_DATA_DIR, "pairend1.sfastq")
        file2 = os.path.join(TEST_DATA_DIR, "pairend3.sfastq")
        fwd_seqs = read_seqs([open(file1)], "fastq")
        rev_seqs = read_seqs([open(file2)], "fastq")
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = "fastq"
        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert "@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output
        assert "@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output
        orp = orphan_out_fhand.getvalue()
        assert "@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in orp
        assert "@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp
        assert "@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp

        file1 = os.path.join(TEST_DATA_DIR, "pairend4.sfastq")
        file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq")
        fwd_seqs = read_seqs([open(file1)], "fastq")
        rev_seqs = read_seqs([open(file2)], "fastq")
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = "fastq"

        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert "@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output
        assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output
        orp = orphan_out_fhand.getvalue()
        assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp
        assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp

        # with reads with no direcction
        file1 = os.path.join(TEST_DATA_DIR, "pairend7.sfastq")
        file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq")
        fwd_seqs = read_seqs([open(file1)], "fastq")
        rev_seqs = read_seqs([open(file2)], "fastq")
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = "fastq"

        seqs = flat_zip_longest(fwd_seqs, rev_seqs)
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)
        output = out_fhand.getvalue()
        assert "@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output
        assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output
        assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output
        assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output

        orp = orphan_out_fhand.getvalue()
        assert "@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1" in orp
        assert "@seq7:136:FC706VJ:2:2104:15343:197393.hhhh" in orp
        assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC" in orp

        # File is not sorted
        file1 = """@s1.f
AACCAGTCAAC
+
CCCFFFFFGHH
@s2.f
AACCAGTCAAC
+
CCCFFFFFGHH
@s1.r
AACCAGTCAAC
+
CCCFFFFFGHH
"""
        file1 = StringIO(file1)
        seqs = read_seqs([file1], "fastq")
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = "fastq"

        try:
            match_pairs(seqs, out_fhand, orphan_out_fhand, out_format)
            output = out_fhand.getvalue()
            self.fail("MalformedFile error expected")
        except MalformedFile:
            pass
Example #11
0
    def test_mate_pair_unorderer_checker():
        'It test the mate pair function'
        # with equal seqs but the last ones
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        seqs = read_seqs([fhand])

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        match_pairs(seqs,
                    out_fhand,
                    orphan_out_fhand,
                    out_format,
                    ordered=False)

        output = out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with the firsts seqs different
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        seqs = read_seqs([fhand])

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        match_pairs(seqs,
                    out_fhand,
                    orphan_out_fhand,
                    out_format,
                    ordered=False)

        output = out_fhand.getvalue()
        assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp
        assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        seqs = read_seqs([fhand])

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs(seqs,
                    out_fhand,
                    orphan_out_fhand,
                    out_format,
                    ordered=False)

        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # unordered file
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2_unordered.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        seqs = read_seqs([fhand])

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs(seqs,
                    out_fhand,
                    orphan_out_fhand,
                    out_format,
                    ordered=False)
        output = out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with reads with no direcction
        file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        seqs = read_seqs([fhand])

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs(seqs,
                    out_fhand,
                    orphan_out_fhand,
                    out_format,
                    ordered=False)
        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output

        orp = orphan_out_fhand.getvalue()
        assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp
        assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp