Beispiel #1
0
class TestFastaParser(unittest.TestCase):

    def setUp(self):
        self.fasta_parser = FastaParser()
        self.example_data = ExampleData()

    def test_parse_1(self):
        fasta_fh = StringIO(self.example_data.fasta_seqs_1)
        self.assertEqual(
            list(self.fasta_parser.entries(fasta_fh)), 
            [('test_1 a random sequence', 'TTTAGAAATTACACA'), 
             ('test_2 another random sequence', 'ACGAGAAATTAAATTAAATT'), 
             ('test_3 another random sequence', 'TAGAGACATTGGATTTTATT')])

    def test_parse_empty_file(self):
        fasta_fh = StringIO("")
        self.assertEqual(
            list(self.fasta_parser.entries(fasta_fh)), [])

    def test_single_entry_file_header(self):
        fasta_fh = StringIO(self.example_data.fasta_seqs_2)
        self.assertEqual(self.fasta_parser.single_entry_file_header(fasta_fh), 
                         "test_4 a random sequence")

    def test_header_id_1(self):
        self.assertEqual(
            self.fasta_parser.header_id("seq_10101 An important protein"),
            "seq_10101")

    def test_header_id_2(self):
        self.assertEqual(
            self.fasta_parser.header_id("seq_10101\tAn important protein"),
            "seq_10101")
Beispiel #2
0
 def _ref_ids_to_file(self, ref_seq_paths):
     """Translate the reference ID to file paths."""
     ref_ids_to_file = {}
     fasta_parser = FastaParser()
     for ref_seq_path in ref_seq_paths:
         ref_seq_file = os.path.basename(ref_seq_path)
         with open(ref_seq_path) as ref_seq_fh:
             ref_seq_id = fasta_parser.header_id(
                 fasta_parser.single_entry_file_header(ref_seq_fh))
             ref_ids_to_file[ref_seq_id] = ref_seq_file
     return ref_ids_to_file
Beispiel #3
0
 def _ref_ids_to_file(self, ref_seq_paths):
     """Translate the reference ID to file paths."""
     ref_ids_to_file = {}
     fasta_parser = FastaParser()
     for ref_seq_path in ref_seq_paths:
         ref_seq_file = os.path.basename(ref_seq_path)
         with open(ref_seq_path) as ref_seq_fh:
             ref_seq_id = fasta_parser.header_id(
                 fasta_parser.single_entry_file_header(ref_seq_fh))
             ref_ids_to_file[ref_seq_id] = ref_seq_file
     return ref_ids_to_file
Beispiel #4
0
class TestFastaParser(unittest.TestCase):
    def setUp(self):
        self.fasta_parser = FastaParser()
        self.example_data = ExampleData()

    def test_parse_1(self):
        fasta_fh = StringIO(self.example_data.fasta_seqs_1)
        self.assertEqual(
            list(self.fasta_parser.entries(fasta_fh)),
            [
                ("test_1 a random sequence", "TTTAGAAATTACACA"),
                ("test_2 another random sequence", "ACGAGAAATTAAATTAAATT"),
                ("test_3 another random sequence", "TAGAGACATTGGATTTTATT"),
            ],
        )

    def test_parse_empty_file(self):
        fasta_fh = StringIO("")
        self.assertEqual(list(self.fasta_parser.entries(fasta_fh)), [])

    def test_single_entry_file_header(self):
        fasta_fh = StringIO(self.example_data.fasta_seqs_2)
        self.assertEqual(
            self.fasta_parser.single_entry_file_header(fasta_fh),
            "test_4 a random sequence",
        )

    def test_header_id_1(self):
        self.assertEqual(
            self.fasta_parser.header_id("seq_10101 An important protein"),
            "seq_10101",
        )

    def test_header_id_2(self):
        self.assertEqual(
            self.fasta_parser.header_id("seq_10101\tAn important protein"),
            "seq_10101",
        )