def test_population_string_parsing(self): populations = VCF.parse_populations_list(self.populations_list) self.assertEqual(populations, {'melpo': ['m523', 'm524', 'm525', 'm589', 'm675', 'm676', 'm682', 'm683', 'm687', 'm689'], 'pachi': ['p516', 'p517', 'p518', 'p519', 'p520', 'p591', 'p596', 'p690', 'p694', 'p696'], 'cydno': ['c511', 'c512', 'c513', 'c514', 'c515', 'c563', 'c614', 'c630', 'c639', 'c640'], 'outgroups': ['h665', 'i02-210']})
def test_header_vs_population_sample_ids(self): """Check that the sample IDs parsed from the population arguement match those in the VCF file. NOTE: In practice the populations arguement can contain fewer samples and populations than actually contained in the VCF file. """ header = VCF.make_empty_vcf_ordered_dict(self.bgzip_path, ) header_sample_ids = [item for count, item in enumerate(header) if count >= 9] populations_dict = VCF.parse_populations_list(self.populations_list) populations_sample_ids = [i for l in populations_dict.values() for i in l] # Check both unique IDs and equal length self.assertEqual(set(header_sample_ids), set(populations_sample_ids)) self.assertEqual(len(header_sample_ids), len(populations_sample_ids))