def test_preprocess_data(self):
     """ Test functionality of preprocess_data()
     """
     gene_map, ref_db, species = preprocess_data(self.working_dir,
                                                 self.target_proteomes_dir,
                                                 ['fa', 'fasta', 'faa'])
     gene_map_exp = {'G1_SE001': '0_0', 'G1_SE002': '1_0',
                     'G1_SE003': '2_0', 'G1_SE004': '3_0',
                     '0_0': 'G1_SE001', '1_0': 'G1_SE002',
                     '2_0': 'G1_SE003', '3_0': 'G1_SE004',
                     'G2_SE001': '0_1', 'G2_SE002': '1_1',
                     'G2_SE003': '2_1', 'G2_SE004': '3_1',
                     '0_1': 'G2_SE001', '1_1': 'G2_SE002',
                     '2_1': 'G2_SE003', '3_1': 'G2_SE004',
                     'G3_SE001': '0_2', 'G3_SE002': '1_2',
                     'G3_SE003': '2_2', 'G3_SE004': '3_2',
                     '0_2': 'G3_SE001', '1_2': 'G3_SE002',
                     '2_2': 'G3_SE003', '3_2': 'G3_SE004',
                     'G4_SE001': '0_3', 'G4_SE002': '1_3',
                     'G4_SE003': '2_3', 'G4_SE004': '3_3',
                     '0_3': 'G4_SE001', '1_3': 'G4_SE002',
                     '2_3': 'G4_SE003', '3_3': 'G4_SE004',
                     'G5_SE001': '0_4', 'G5_SE002': '1_4',
                     'G5_SE003': '2_4', 'G5_SE004': '3_4',
                     '0_4': 'G5_SE001', '1_4': 'G5_SE002',
                     '2_4': 'G5_SE003', '3_4': 'G5_SE004'}
     ref_db_exp = {}
     with open(self.species_1_fp, 'U') as fh:
         for label, seq in parse_fasta(fh):
             ref_db_exp[label] = seq
     with open(self.species_2_fp, 'U') as fh:
         for label, seq in parse_fasta(fh):
             ref_db_exp[label] = seq
     with open(self.species_3_fp, 'U') as fh:
         for label, seq in parse_fasta(fh):
             ref_db_exp[label] = seq
     with open(self.species_4_fp, 'U') as fh:
         for label, seq in parse_fasta(fh):
             ref_db_exp[label] = seq
     num_species_exp = 4
     self.assertDictEqual(gene_map, gene_map_exp)
     self.assertDictEqual(ref_db, ref_db_exp)
     self.assertEqual(species, num_species_exp)
 def test_preprocess_data(self):
     """ Test functionality of preprocess_data()
     """
     gene_map, ref_db, species = preprocess_data(self.working_dir,
                                                 self.target_proteomes_dir,
                                                 ['fa', 'fasta', 'faa'])
     gene_map_exp = {'G1_SE001': '0_0', 'G1_SE002': '1_0',
                     'G1_SE003': '2_0', 'G1_SE004': '3_0',
                     '0_0': 'G1_SE001', '1_0': 'G1_SE002',
                     '2_0': 'G1_SE003', '3_0': 'G1_SE004',
                     'G2_SE001': '0_1', 'G2_SE002': '1_1',
                     'G2_SE003': '2_1', 'G2_SE004': '3_1',
                     '0_1': 'G2_SE001', '1_1': 'G2_SE002',
                     '2_1': 'G2_SE003', '3_1': 'G2_SE004',
                     'G3_SE001': '0_2', 'G3_SE002': '1_2',
                     'G3_SE003': '2_2', 'G3_SE004': '3_2',
                     '0_2': 'G3_SE001', '1_2': 'G3_SE002',
                     '2_2': 'G3_SE003', '3_2': 'G3_SE004',
                     'G4_SE001': '0_3', 'G4_SE002': '1_3',
                     'G4_SE003': '2_3', 'G4_SE004': '3_3',
                     '0_3': 'G4_SE001', '1_3': 'G4_SE002',
                     '2_3': 'G4_SE003', '3_3': 'G4_SE004',
                     'G5_SE001': '0_4', 'G5_SE002': '1_4',
                     'G5_SE003': '2_4', 'G5_SE004': '3_4',
                     '0_4': 'G5_SE001', '1_4': 'G5_SE002',
                     '2_4': 'G5_SE003', '3_4': 'G5_SE004'}
     ref_db_exp = {}
     for seq in skbio.io.read(self.species_1_fp, format='fasta'):
         ref_db_exp[seq.metadata['id']] = seq
     for seq in skbio.io.read(self.species_2_fp, format='fasta'):
         ref_db_exp[seq.metadata['id']] = seq
     for seq in skbio.io.read(self.species_3_fp, format='fasta'):
         ref_db_exp[seq.metadata['id']] = seq
     for seq in skbio.io.read(self.species_4_fp, format='fasta'):
         ref_db_exp[seq.metadata['id']] = seq
     num_species_exp = 4
     self.assertDictEqual(gene_map, gene_map_exp)
     self.assertDictEqual(ref_db, ref_db_exp)
     self.assertEqual(species, num_species_exp)
 def test_preprocess_data(self):
     """ Test functionality of preprocess_data()
     """
     gene_map, ref_db, species = preprocess_data(self.working_dir,
                                                 self.target_proteomes_dir,
                                                 ['fa', 'fasta', 'faa'])
     gene_map_exp = {
         'G1_SE001': '0_0',
         'G1_SE002': '1_0',
         'G1_SE003': '2_0',
         'G1_SE004': '3_0',
         '0_0': 'G1_SE001',
         '1_0': 'G1_SE002',
         '2_0': 'G1_SE003',
         '3_0': 'G1_SE004',
         'G2_SE001': '0_1',
         'G2_SE002': '1_1',
         'G2_SE003': '2_1',
         'G2_SE004': '3_1',
         '0_1': 'G2_SE001',
         '1_1': 'G2_SE002',
         '2_1': 'G2_SE003',
         '3_1': 'G2_SE004',
         'G3_SE001': '0_2',
         'G3_SE002': '1_2',
         'G3_SE003': '2_2',
         'G3_SE004': '3_2',
         '0_2': 'G3_SE001',
         '1_2': 'G3_SE002',
         '2_2': 'G3_SE003',
         '3_2': 'G3_SE004',
         'G4_SE001': '0_3',
         'G4_SE002': '1_3',
         'G4_SE003': '2_3',
         'G4_SE004': '3_3',
         '0_3': 'G4_SE001',
         '1_3': 'G4_SE002',
         '2_3': 'G4_SE003',
         '3_3': 'G4_SE004',
         'G5_SE001': '0_4',
         'G5_SE002': '1_4',
         'G5_SE003': '2_4',
         'G5_SE004': '3_4',
         '0_4': 'G5_SE001',
         '1_4': 'G5_SE002',
         '2_4': 'G5_SE003',
         '3_4': 'G5_SE004'
     }
     ref_db_exp = {}
     for seq in skbio.io.read(self.species_1_fp, format='fasta'):
         ref_db_exp[seq.metadata['id']] = seq
     for seq in skbio.io.read(self.species_2_fp, format='fasta'):
         ref_db_exp[seq.metadata['id']] = seq
     for seq in skbio.io.read(self.species_3_fp, format='fasta'):
         ref_db_exp[seq.metadata['id']] = seq
     for seq in skbio.io.read(self.species_4_fp, format='fasta'):
         ref_db_exp[seq.metadata['id']] = seq
     num_species_exp = 4
     self.assertDictEqual(gene_map, gene_map_exp)
     self.assertDictEqual(ref_db, ref_db_exp)
     self.assertEqual(species, num_species_exp)