def test_dereplicate_seqs_remove_singletons(self): """ Test dereplicate_seqs() method functionality with removing singletons """ seqs = [("seq1", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"), ("seq2", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"), ("seq3", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"), ("seq4", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCT"), ("seq5", "TACCAGCCCCTTAAGTGGTAGGGACGATTATTTGGCCTAAAGCGTCCG"), ("seq6", "CTGCAAGGCTAGGGGGCGGGAGAGGCGGGTGGTACTTGAGGGGAGAAT"), ("seq7", "CTGCAAGGCTAGGGGGCGGGAGAGGCGGGTGGTACTTGAGGGGAGAAT")] seqs_fp = join(self.working_dir, "seqs.fasta") with open(seqs_fp, 'w') as seqs_f: for seq in seqs: seqs_f.write(">%s\n%s\n" % seq) output_fp = join(self.working_dir, "seqs_derep.fasta") log_fp = join(self.working_dir, "seqs_derep.log") dereplicate_seqs(seqs_fp=seqs_fp, output_fp=output_fp) self.assertTrue(isfile(output_fp)) self.assertTrue(isfile(log_fp)) exp = [("seq1;size=3;", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"), ("seq6;size=2;", "CTGCAAGGCTAGGGGGCGGGAGAGGCGGGTGGTACTTGAGGGGAGAAT")] with open(output_fp, 'U') as out_f: act = [item for item in parse_fasta(out_f)] self.assertEqual(act, exp)
def test_dereplicate_seqs(self): """ Test dereplicate_seqs() method functionality, keep singletons """ seqs = [("seq1", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"), ("seq2", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"), ("seq3", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"), ("seq4", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCT"), ("seq5", "TACCAGCCCCTTAAGTGGTAGGGACGATTATTTGGCCTAAAGCGTCCG"), ("seq6", "CTGCAAGGCTAGGGGGCGGGAGAGGCGGGTGGTACTTGAGGGGAGAAT"), ("seq7", "CTGCAAGGCTAGGGGGCGGGAGAGGCGGGTGGTACTTGAGGGGAGAAT")] seqs_fp = join(self.working_dir, "seqs.fasta") with open(seqs_fp, 'w') as seqs_f: for seq in seqs: seqs_f.write(">%s\n%s\n" % seq) output_fp = join(self.working_dir, "seqs_derep.fasta") dereplicate_seqs(seqs_fp=seqs_fp, output_fp=output_fp, min_size=1) self.assertTrue(isfile(output_fp)) exp = [("seq1;size=3", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"), ("seq6;size=2", "CTGCAAGGCTAGGGGGCGGGAGAGGCGGGTGGTACTTGAGGGGAGAAT"), ("seq4;size=1", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCT"), ("seq5;size=1", "TACCAGCCCCTTAAGTGGTAGGGACGATTATTTGGCCTAAAGCGTCCG")] act = [item for item in sequence_generator(output_fp)] self.assertEqual(act, exp)