def test_preprocess_data(self): """ Test functionality of preprocess_data() """ gene_map, ref_db, species = preprocess_data(self.working_dir, self.target_proteomes_dir, ['fa', 'fasta', 'faa']) gene_map_exp = {'G1_SE001': '0_0', 'G1_SE002': '1_0', 'G1_SE003': '2_0', 'G1_SE004': '3_0', '0_0': 'G1_SE001', '1_0': 'G1_SE002', '2_0': 'G1_SE003', '3_0': 'G1_SE004', 'G2_SE001': '0_1', 'G2_SE002': '1_1', 'G2_SE003': '2_1', 'G2_SE004': '3_1', '0_1': 'G2_SE001', '1_1': 'G2_SE002', '2_1': 'G2_SE003', '3_1': 'G2_SE004', 'G3_SE001': '0_2', 'G3_SE002': '1_2', 'G3_SE003': '2_2', 'G3_SE004': '3_2', '0_2': 'G3_SE001', '1_2': 'G3_SE002', '2_2': 'G3_SE003', '3_2': 'G3_SE004', 'G4_SE001': '0_3', 'G4_SE002': '1_3', 'G4_SE003': '2_3', 'G4_SE004': '3_3', '0_3': 'G4_SE001', '1_3': 'G4_SE002', '2_3': 'G4_SE003', '3_3': 'G4_SE004', 'G5_SE001': '0_4', 'G5_SE002': '1_4', 'G5_SE003': '2_4', 'G5_SE004': '3_4', '0_4': 'G5_SE001', '1_4': 'G5_SE002', '2_4': 'G5_SE003', '3_4': 'G5_SE004'} ref_db_exp = {} with open(self.species_1_fp, 'U') as fh: for label, seq in parse_fasta(fh): ref_db_exp[label] = seq with open(self.species_2_fp, 'U') as fh: for label, seq in parse_fasta(fh): ref_db_exp[label] = seq with open(self.species_3_fp, 'U') as fh: for label, seq in parse_fasta(fh): ref_db_exp[label] = seq with open(self.species_4_fp, 'U') as fh: for label, seq in parse_fasta(fh): ref_db_exp[label] = seq num_species_exp = 4 self.assertDictEqual(gene_map, gene_map_exp) self.assertDictEqual(ref_db, ref_db_exp) self.assertEqual(species, num_species_exp)
def test_preprocess_data(self): """ Test functionality of preprocess_data() """ gene_map, ref_db, species = preprocess_data(self.working_dir, self.target_proteomes_dir, ['fa', 'fasta', 'faa']) gene_map_exp = {'G1_SE001': '0_0', 'G1_SE002': '1_0', 'G1_SE003': '2_0', 'G1_SE004': '3_0', '0_0': 'G1_SE001', '1_0': 'G1_SE002', '2_0': 'G1_SE003', '3_0': 'G1_SE004', 'G2_SE001': '0_1', 'G2_SE002': '1_1', 'G2_SE003': '2_1', 'G2_SE004': '3_1', '0_1': 'G2_SE001', '1_1': 'G2_SE002', '2_1': 'G2_SE003', '3_1': 'G2_SE004', 'G3_SE001': '0_2', 'G3_SE002': '1_2', 'G3_SE003': '2_2', 'G3_SE004': '3_2', '0_2': 'G3_SE001', '1_2': 'G3_SE002', '2_2': 'G3_SE003', '3_2': 'G3_SE004', 'G4_SE001': '0_3', 'G4_SE002': '1_3', 'G4_SE003': '2_3', 'G4_SE004': '3_3', '0_3': 'G4_SE001', '1_3': 'G4_SE002', '2_3': 'G4_SE003', '3_3': 'G4_SE004', 'G5_SE001': '0_4', 'G5_SE002': '1_4', 'G5_SE003': '2_4', 'G5_SE004': '3_4', '0_4': 'G5_SE001', '1_4': 'G5_SE002', '2_4': 'G5_SE003', '3_4': 'G5_SE004'} ref_db_exp = {} for seq in skbio.io.read(self.species_1_fp, format='fasta'): ref_db_exp[seq.metadata['id']] = seq for seq in skbio.io.read(self.species_2_fp, format='fasta'): ref_db_exp[seq.metadata['id']] = seq for seq in skbio.io.read(self.species_3_fp, format='fasta'): ref_db_exp[seq.metadata['id']] = seq for seq in skbio.io.read(self.species_4_fp, format='fasta'): ref_db_exp[seq.metadata['id']] = seq num_species_exp = 4 self.assertDictEqual(gene_map, gene_map_exp) self.assertDictEqual(ref_db, ref_db_exp) self.assertEqual(species, num_species_exp)
def test_preprocess_data(self): """ Test functionality of preprocess_data() """ gene_map, ref_db, species = preprocess_data(self.working_dir, self.target_proteomes_dir, ['fa', 'fasta', 'faa']) gene_map_exp = { 'G1_SE001': '0_0', 'G1_SE002': '1_0', 'G1_SE003': '2_0', 'G1_SE004': '3_0', '0_0': 'G1_SE001', '1_0': 'G1_SE002', '2_0': 'G1_SE003', '3_0': 'G1_SE004', 'G2_SE001': '0_1', 'G2_SE002': '1_1', 'G2_SE003': '2_1', 'G2_SE004': '3_1', '0_1': 'G2_SE001', '1_1': 'G2_SE002', '2_1': 'G2_SE003', '3_1': 'G2_SE004', 'G3_SE001': '0_2', 'G3_SE002': '1_2', 'G3_SE003': '2_2', 'G3_SE004': '3_2', '0_2': 'G3_SE001', '1_2': 'G3_SE002', '2_2': 'G3_SE003', '3_2': 'G3_SE004', 'G4_SE001': '0_3', 'G4_SE002': '1_3', 'G4_SE003': '2_3', 'G4_SE004': '3_3', '0_3': 'G4_SE001', '1_3': 'G4_SE002', '2_3': 'G4_SE003', '3_3': 'G4_SE004', 'G5_SE001': '0_4', 'G5_SE002': '1_4', 'G5_SE003': '2_4', 'G5_SE004': '3_4', '0_4': 'G5_SE001', '1_4': 'G5_SE002', '2_4': 'G5_SE003', '3_4': 'G5_SE004' } ref_db_exp = {} for seq in skbio.io.read(self.species_1_fp, format='fasta'): ref_db_exp[seq.metadata['id']] = seq for seq in skbio.io.read(self.species_2_fp, format='fasta'): ref_db_exp[seq.metadata['id']] = seq for seq in skbio.io.read(self.species_3_fp, format='fasta'): ref_db_exp[seq.metadata['id']] = seq for seq in skbio.io.read(self.species_4_fp, format='fasta'): ref_db_exp[seq.metadata['id']] = seq num_species_exp = 4 self.assertDictEqual(gene_map, gene_map_exp) self.assertDictEqual(ref_db, ref_db_exp) self.assertEqual(species, num_species_exp)