def test_parse_metadata_state_descriptions(self): """parse_metadata_state_descriptions should return correct states from string.""" s = '' self.assertEqual(parse_metadata_state_descriptions(s), {}) s = 'Study:Twin,Hand,Dog;BodySite:Palm,Stool' self.assertEqual( parse_metadata_state_descriptions(s), { 'Study': set(['Twin', 'Hand', 'Dog']), 'BodySite': set(['Palm', 'Stool']) }) # category names with colons i. e. ontology-derived s = 'Study:Twin,Hand,Dog;site:UBERON:feces,UBERON:ear canal;'+\ 'env_feature:ENVO:farm soil,ENVO:national park' self.assertEqual( parse_metadata_state_descriptions(s), { 'Study': set(['Twin', 'Hand', 'Dog']), 'site': set(['UBERON:feces', 'UBERON:ear canal']), 'env_feature': set(['ENVO:farm soil', 'ENVO:national park']) }) s = "Treatment:A,B,C;env_matter:ENVO:nitsol,ENVO:farm soil;env_biom:"+\ "ENVO:Tropical dry (including Monsoon forests) and woodlands,"+\ "ENVO:Forest: including woodlands;country:GAZ:Persnickety Islands"+\ ",St. Kitt's and Nevis" self.assertEqual(parse_metadata_state_descriptions(s), {"country": set(["GAZ:Persnickety Islands", "St. Kitt's and Nevis"]), "env_biom":set(["ENVO:Tropical dry (including Monsoon forests) "+\ "and woodlands", "ENVO:Forest: including woodlands"]), "env_matter": set(["ENVO:nitsol","ENVO:farm soil"]), 'Treatment':set(["A", "B", "C"])})
def test_get_sample_ids(self): """get_sample_ids should return sample ids matching criteria.""" self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\ parse_metadata_state_descriptions('Study:Twin')), []) self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\ parse_metadata_state_descriptions('Study:Dog')), ['a','b']) self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\ parse_metadata_state_descriptions('Study:*,!Dog')), ['c','d','e']) self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\ parse_metadata_state_descriptions('Study:*,!Dog;BodySite:Stool')), ['e']) self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\ parse_metadata_state_descriptions('BodySite:Stool')), ['a','b','e'])
def test_parse_metadata_state_descriptions(self): """parse_metadata_state_descriptions should return correct states from string.""" s = "" self.assertEqual(parse_metadata_state_descriptions(s), {}) s = "Study:Twin,Hand,Dog;BodySite:Palm,Stool" self.assertEqual( parse_metadata_state_descriptions(s), {"Study": set(["Twin", "Hand", "Dog"]), "BodySite": set(["Palm", "Stool"])}, ) # category names with colons i. e. ontology-derived s = "Study:Twin,Hand,Dog;site:UBERON:feces,UBERON:ear canal;" + "env_feature:ENVO:farm soil,ENVO:national park" self.assertEqual( parse_metadata_state_descriptions(s), { "Study": set(["Twin", "Hand", "Dog"]), "site": set(["UBERON:feces", "UBERON:ear canal"]), "env_feature": set(["ENVO:farm soil", "ENVO:national park"]), }, ) s = ( "Treatment:A,B,C;env_matter:ENVO:nitsol,ENVO:farm soil;env_biom:" + "ENVO:Tropical dry (including Monsoon forests) and woodlands," + "ENVO:Forest: including woodlands;country:GAZ:Persnickety Islands" + ",St. Kitt's and Nevis" ) self.assertEqual( parse_metadata_state_descriptions(s), { "country": set(["GAZ:Persnickety Islands", "St. Kitt's and Nevis"]), "env_biom": set( [ "ENVO:Tropical dry (including Monsoon forests) " + "and woodlands", "ENVO:Forest: including woodlands", ] ), "env_matter": set(["ENVO:nitsol", "ENVO:farm soil"]), "Treatment": set(["A", "B", "C"]), }, )
def sample_ids_from_metadata_description(mapping_f,valid_states_str): """ Given a description of metadata, return the corresponding sample ids """ map_data, map_header, map_comments = parse_mapping_file(mapping_f) valid_states = parse_metadata_state_descriptions(valid_states_str) sample_ids = get_sample_ids(map_data, map_header, valid_states) if len(sample_ids)<1: raise ValueError,"All samples have been filtered out for the criteria"+\ " described in the valid states" return sample_ids
def sample_ids_from_metadata_description(mapping_f, valid_states_str): """ Given a description of metadata, return the corresponding sample ids """ map_data, map_header, map_comments = parse_mapping_file(mapping_f) valid_states = parse_metadata_state_descriptions(valid_states_str) sample_ids = get_sample_ids(map_data, map_header, valid_states) if len(sample_ids) < 1: raise ValueError,"All samples have been filtered out for the criteria"+\ " described in the valid states" return sample_ids