def test_all_orphan():
        "All reads end up in orphan"
        seqs = [SeqRecord(Seq("ACT"), id="seq1"), SeqRecord(Seq("ACT"), id="seq2")]
        seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None))
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format="fasta")
        assert orphan_out_fhand.getvalue() == ">seq1\nACT\n>seq2\nACT\n"

        seq_fhand = NamedTemporaryFile(suffix=".fasta")
        write_seqs(seqs, seq_fhand, file_format="fasta")
        seq_fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs_unordered(seq_fhand.name, out_fhand, orphan_out_fhand, out_format="fasta")
        assert ">seq1\nACT\n" in orphan_out_fhand.getvalue()
        assert ">seq2\nACT\n" in orphan_out_fhand.getvalue()
Beispiel #2
0
    def test_all_orphan():
        'All reads end up in orphan'
        seqs = [SeqRecord(Seq('ACT'), id='seq1'),
                SeqRecord(Seq('ACT'), id='seq2')]
        seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None))
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta')
        assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n'

        seq_fhand = NamedTemporaryFile(suffix='.fasta')
        write_seqs(seqs, seq_fhand, file_format='fasta')
        seq_fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs_unordered(seq_fhand.name, out_fhand, orphan_out_fhand,
                              out_format='fasta')
        assert '>seq1\nACT\n' in orphan_out_fhand.getvalue()
        assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
    def test_all_orphan():
        'All reads end up in orphan'
        seqs = [
            SeqRecord(Seq('ACT'), id='seq1'),
            SeqRecord(Seq('ACT'), id='seq2')
        ]
        seqs = list(assing_kind_to_seqs(SEQRECORD, seqs, None))
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs(seqs, out_fhand, orphan_out_fhand, out_format='fasta')
        assert orphan_out_fhand.getvalue() == '>seq1\nACT\n>seq2\nACT\n'

        seq_fhand = NamedTemporaryFile(suffix='.fasta')
        write_seqs(seqs, seq_fhand, file_format='fasta')
        seq_fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        match_pairs_unordered(seq_fhand.name,
                              out_fhand,
                              orphan_out_fhand,
                              out_format='fasta')
        assert '>seq1\nACT\n' in orphan_out_fhand.getvalue()
        assert '>seq2\nACT\n' in orphan_out_fhand.getvalue()
    def test_mate_pair_unorderer_checker():
        'It test the mate pair function'
        # with equal seqs but the last ones
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand,
                              out_format)

        output = out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with the firsts seqs different
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend3.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'
        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand,
                              out_format)

        output = out_fhand.getvalue()
        assert '@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in orp
        assert '@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        file1 = os.path.join(TEST_DATA_DIR, 'pairend4.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand,
                              out_format)

        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # unordered file
        file1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2_unordered.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand,
                              out_format)
        output = out_fhand.getvalue()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        orp = orphan_out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # with reads with no direcction
        file1 = os.path.join(TEST_DATA_DIR, 'pairend7.sfastq')
        file2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = 'fastq'

        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand,
                              out_format)
        output = out_fhand.getvalue()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in output
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in output

        orp = orphan_out_fhand.getvalue()
        assert '@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1' in orp
        assert '@seq7:136:FC706VJ:2:2104:15343:197393.hhhh' in orp
        assert '@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC' in orp
    def test_mate_pair_unorderer_checker():
        "It test the mate pair function"
        # with equal seqs but the last ones
        file1 = os.path.join(TEST_DATA_DIR, "pairend1.sfastq")
        file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq")
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = "fastq"
        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert "@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output
        assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output
        orp = orphan_out_fhand.getvalue()
        assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp

        # with the firsts seqs different
        file1 = os.path.join(TEST_DATA_DIR, "pairend1.sfastq")
        file2 = os.path.join(TEST_DATA_DIR, "pairend3.sfastq")
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = "fastq"
        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert "@seq4:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output
        assert "@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output
        orp = orphan_out_fhand.getvalue()
        assert "@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in orp
        assert "@seq3:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp
        assert "@seq6:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp

        file1 = os.path.join(TEST_DATA_DIR, "pairend4.sfastq")
        file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq")
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = "fastq"

        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand, out_format)

        output = out_fhand.getvalue()
        assert "@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output
        assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output
        orp = orphan_out_fhand.getvalue()
        assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp
        assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp

        # unordered file
        file1 = os.path.join(TEST_DATA_DIR, "pairend1.sfastq")
        file2 = os.path.join(TEST_DATA_DIR, "pairend2_unordered.sfastq")
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()
        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = "fastq"

        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand, out_format)
        output = out_fhand.getvalue()
        assert "@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output
        assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output
        orp = orphan_out_fhand.getvalue()
        assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in orp

        # with reads with no direcction
        file1 = os.path.join(TEST_DATA_DIR, "pairend7.sfastq")
        file2 = os.path.join(TEST_DATA_DIR, "pairend2.sfastq")
        fhand = NamedTemporaryFile()
        fhand.write(open(file1).read())
        fhand.write(open(file2).read())
        fhand.flush()

        out_fhand = StringIO()
        orphan_out_fhand = StringIO()
        out_format = "fastq"

        match_pairs_unordered(fhand.name, out_fhand, orphan_out_fhand, out_format)
        output = out_fhand.getvalue()
        assert "@seq8:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG" in output
        assert "@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output
        assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output
        assert "@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG" in output

        orp = orphan_out_fhand.getvalue()
        assert "@seq6:136:FC706VJ:2:2104:15343:197393.mpl_1" in orp
        assert "@seq7:136:FC706VJ:2:2104:15343:197393.hhhh" in orp
        assert "@seq2:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCAC" in orp