def _test_filter_duplicates(paired_reads, n_seqs_packet): assert isinstance(n_seqs_packet, int) or n_seqs_packet == None in_fhand = NamedTemporaryFile() fastq_with_dups = (FASTQ_NO_DUPS1 + FASTQ_DUPS + FASTQ_NO_DUPS2 + FASTQ_NO_DUPS3) in_fhand.write(fastq_with_dups) in_fhand.flush() in_fhand = open(in_fhand.name) out_fhand = NamedTemporaryFile() filter_duplicates([in_fhand], out_fhand, paired_reads, n_seqs_packet) flush_fhand(out_fhand) filtered_pairs = list(_read_pairs([open(out_fhand.name)], paired_reads)) fastq_no_dups = FASTQ_NO_DUPS1 + FASTQ_NO_DUPS2 + FASTQ_NO_DUPS3 expected_pairs = list(_read_pairs([StringIO(fastq_no_dups)], paired_reads)) #print 'filtered_pairs ->', filtered_pairs #print 'expected_pairs ->', expected_pairs #print len(filtered_pairs), len(expected_pairs) #assert len(filtered_pairs) == len(expected_pairs) for pair1 in expected_pairs: counts = 0 for pair2 in filtered_pairs: if _seqitem_pairs_equal(pair1, pair2): counts += 1 assert counts == 1 in_fhand.close()
def _test_filter_duplicates(paired_reads, n_seqs_packet): assert isinstance(n_seqs_packet, int) or n_seqs_packet == None in_fhand = NamedTemporaryFile() fastq_with_dups = (FASTQ_NO_DUPS1 + FASTQ_DUPS + FASTQ_NO_DUPS2 + FASTQ_NO_DUPS3) in_fhand.write(fastq_with_dups) in_fhand.flush() in_fhand = open(in_fhand.name) out_fhand = NamedTemporaryFile() filter_duplicates([in_fhand], out_fhand, paired_reads, n_seqs_packet) flush_fhand(out_fhand) filtered_pairs = list(_read_pairs([open(out_fhand.name)], paired_reads)) fastq_no_dups = FASTQ_NO_DUPS1 + FASTQ_NO_DUPS2 + FASTQ_NO_DUPS3 expected_pairs = list(_read_pairs([StringIO(fastq_no_dups)], paired_reads)) #print 'filtered_pairs ->', filtered_pairs #print 'expected_pairs ->', expected_pairs #print len(filtered_pairs), len(expected_pairs) #assert len(filtered_pairs) == len(expected_pairs) for pair1 in expected_pairs: counts = 0 for pair2 in filtered_pairs: if _seqitem_pairs_equal(pair1, pair2): counts += 1 assert counts == 1 in_fhand.close() # use length in_fhand = NamedTemporaryFile() in_fhand.write(FASTQ_DUPS) in_fhand.flush() in_fhand = open(in_fhand.name) out_fhand = NamedTemporaryFile() filter_duplicates([in_fhand], out_fhand, paired_reads=False, n_seqs_packet=n_seqs_packet, use_length=10) flush_fhand(out_fhand) filtered_pairs = list( _read_pairs([open(out_fhand.name)], paired_reads=False)) assert len(filtered_pairs) == 2 # use length in_fhand = NamedTemporaryFile() in_fhand.write(FASTQ_DUPS) in_fhand.flush() in_fhand = open(in_fhand.name) out_fhand = NamedTemporaryFile() filter_duplicates([in_fhand], out_fhand, paired_reads=False, n_seqs_packet=n_seqs_packet, use_length=1) flush_fhand(out_fhand) filtered_pairs = list( _read_pairs([open(out_fhand.name)], paired_reads=False)) assert len(filtered_pairs) == 1
def test_seqitem_pairs_equal(self): seq1 = SeqWrapper(SEQITEM, SeqItem('seq1', ['@seq1\n', 'TAATAC\n', '+\n', 'TTTDFG\n']), 'fastq') seq2 = SeqWrapper(SEQITEM, SeqItem('seq2', ['@seq2\n', 'TCATTA\n', '+\n', 'ABCBEG\n']), 'fastq') seq3 = SeqWrapper(SEQITEM, SeqItem('seq3', ['@seq3\n', 'TAATAC\n', '+\n', 'TTTDFG\n']), 'fastq') seq4 = SeqWrapper(SEQITEM, SeqItem('seq4', ['@seq4\n', 'ACGCGT\n', '+\n', 'ABCBEG\n']), 'fastq') pair1 = (seq1, seq2) pair2 = (seq2, seq4) pair3 = (seq3, seq2) pair4 = (seq2, seq1) assert _seqitem_pairs_equal(pair1, pair3) assert not _seqitem_pairs_equal(pair1, pair2) assert not _seqitem_pairs_equal(pair1, pair4) assert _seqitem_pairs_equal([seq1], [seq3]) assert not _seqitem_pairs_equal([seq1], [seq2]) assert not _seqitem_pairs_equal([seq1], pair1) assert not _seqitem_pairs_equal(pair1, seq2)
def test_seqitem_pairs_equal(self): seq1 = SeqWrapper( SEQITEM, SeqItem('seq1', ['@seq1\n', 'TAATAC\n', '+\n', 'TTTDFG\n']), 'fastq') seq2 = SeqWrapper( SEQITEM, SeqItem('seq2', ['@seq2\n', 'TCATTA\n', '+\n', 'ABCBEG\n']), 'fastq') seq3 = SeqWrapper( SEQITEM, SeqItem('seq3', ['@seq3\n', 'TAATAC\n', '+\n', 'TTTDFG\n']), 'fastq') seq4 = SeqWrapper( SEQITEM, SeqItem('seq4', ['@seq4\n', 'ACGCGT\n', '+\n', 'ABCBEG\n']), 'fastq') pair1 = (seq1, seq2) pair2 = (seq2, seq4) pair3 = (seq3, seq2) pair4 = (seq2, seq1) assert _seqitem_pairs_equal(pair1, pair3) assert not _seqitem_pairs_equal(pair1, pair2) assert not _seqitem_pairs_equal(pair1, pair4) assert _seqitem_pairs_equal([seq1], [seq3]) assert not _seqitem_pairs_equal([seq1], [seq2]) assert not _seqitem_pairs_equal([seq1], pair1) assert not _seqitem_pairs_equal(pair1, seq2)
def test_seqitem_pairs_equal(self): seq1 = SeqWrapper(SEQITEM, SeqItem("seq1", ["@seq1\n", "TAATAC\n", "+\n", "TTTDFG\n"]), "fastq") seq2 = SeqWrapper(SEQITEM, SeqItem("seq2", ["@seq2\n", "TCATTA\n", "+\n", "ABCBEG\n"]), "fastq") seq3 = SeqWrapper(SEQITEM, SeqItem("seq3", ["@seq3\n", "TAATAC\n", "+\n", "TTTDFG\n"]), "fastq") seq4 = SeqWrapper(SEQITEM, SeqItem("seq4", ["@seq4\n", "ACGCGT\n", "+\n", "ABCBEG\n"]), "fastq") pair1 = (seq1, seq2) pair2 = (seq2, seq4) pair3 = (seq3, seq2) pair4 = (seq2, seq1) assert _seqitem_pairs_equal(pair1, pair3) assert _seqitem_pairs_equal(pair1, pair2) == False assert _seqitem_pairs_equal(pair1, pair4) == False assert _seqitem_pairs_equal([seq1], [seq3]) assert _seqitem_pairs_equal([seq1], [seq2]) == False assert _seqitem_pairs_equal([seq1], pair1) == False assert _seqitem_pairs_equal(pair1, seq2) == False