class TestFastaParser(unittest.TestCase): def setUp(self): self.fasta_parser = FastaParser() self.example_data = ExampleData() def test_parse_1(self): fasta_fh = StringIO(self.example_data.fasta_seqs_1) self.assertEqual( list(self.fasta_parser.entries(fasta_fh)), [('test_1 a random sequence', 'TTTAGAAATTACACA'), ('test_2 another random sequence', 'ACGAGAAATTAAATTAAATT'), ('test_3 another random sequence', 'TAGAGACATTGGATTTTATT')]) def test_parse_empty_file(self): fasta_fh = StringIO("") self.assertEqual( list(self.fasta_parser.entries(fasta_fh)), []) def test_single_entry_file_header(self): fasta_fh = StringIO(self.example_data.fasta_seqs_2) self.assertEqual(self.fasta_parser.single_entry_file_header(fasta_fh), "test_4 a random sequence") def test_header_id_1(self): self.assertEqual( self.fasta_parser.header_id("seq_10101 An important protein"), "seq_10101") def test_header_id_2(self): self.assertEqual( self.fasta_parser.header_id("seq_10101\tAn important protein"), "seq_10101")
def _ref_ids_to_file(self, ref_seq_paths): """Translate the reference ID to file paths.""" ref_ids_to_file = {} fasta_parser = FastaParser() for ref_seq_path in ref_seq_paths: ref_seq_file = os.path.basename(ref_seq_path) with open(ref_seq_path) as ref_seq_fh: ref_seq_id = fasta_parser.header_id( fasta_parser.single_entry_file_header(ref_seq_fh)) ref_ids_to_file[ref_seq_id] = ref_seq_file return ref_ids_to_file
class TestFastaParser(unittest.TestCase): def setUp(self): self.fasta_parser = FastaParser() self.example_data = ExampleData() def test_parse_1(self): fasta_fh = StringIO(self.example_data.fasta_seqs_1) self.assertEqual( list(self.fasta_parser.entries(fasta_fh)), [ ("test_1 a random sequence", "TTTAGAAATTACACA"), ("test_2 another random sequence", "ACGAGAAATTAAATTAAATT"), ("test_3 another random sequence", "TAGAGACATTGGATTTTATT"), ], ) def test_parse_empty_file(self): fasta_fh = StringIO("") self.assertEqual(list(self.fasta_parser.entries(fasta_fh)), []) def test_single_entry_file_header(self): fasta_fh = StringIO(self.example_data.fasta_seqs_2) self.assertEqual( self.fasta_parser.single_entry_file_header(fasta_fh), "test_4 a random sequence", ) def test_header_id_1(self): self.assertEqual( self.fasta_parser.header_id("seq_10101 An important protein"), "seq_10101", ) def test_header_id_2(self): self.assertEqual( self.fasta_parser.header_id("seq_10101\tAn important protein"), "seq_10101", )