Example #1
0
    def test_parse_metadata_state_descriptions(self):
        """parse_metadata_state_descriptions should return correct states from string."""
        s = ''
        self.assertEqual(parse_metadata_state_descriptions(s), {})
        s = 'Study:Twin,Hand,Dog;BodySite:Palm,Stool'
        self.assertEqual(
            parse_metadata_state_descriptions(s), {
                'Study': set(['Twin', 'Hand', 'Dog']),
                'BodySite': set(['Palm', 'Stool'])
            })

        # category names with colons i. e. ontology-derived
        s = 'Study:Twin,Hand,Dog;site:UBERON:feces,UBERON:ear canal;'+\
            'env_feature:ENVO:farm soil,ENVO:national park'
        self.assertEqual(
            parse_metadata_state_descriptions(s), {
                'Study': set(['Twin', 'Hand', 'Dog']),
                'site': set(['UBERON:feces', 'UBERON:ear canal']),
                'env_feature': set(['ENVO:farm soil', 'ENVO:national park'])
            })

        s = "Treatment:A,B,C;env_matter:ENVO:nitsol,ENVO:farm soil;env_biom:"+\
            "ENVO:Tropical dry (including Monsoon forests) and woodlands,"+\
            "ENVO:Forest: including woodlands;country:GAZ:Persnickety Islands"+\
            ",St. Kitt's and Nevis"
        self.assertEqual(parse_metadata_state_descriptions(s), {"country":
            set(["GAZ:Persnickety Islands", "St. Kitt's and Nevis"]),
            "env_biom":set(["ENVO:Tropical dry (including Monsoon forests) "+\
            "and woodlands", "ENVO:Forest: including woodlands"]), "env_matter":
            set(["ENVO:nitsol","ENVO:farm soil"]), 'Treatment':set(["A", "B",
            "C"])})
 def test_get_sample_ids(self):
     """get_sample_ids should return sample ids matching criteria."""
     self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\
         parse_metadata_state_descriptions('Study:Twin')), [])
     self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\
         parse_metadata_state_descriptions('Study:Dog')), ['a','b'])
     self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\
         parse_metadata_state_descriptions('Study:*,!Dog')), ['c','d','e'])
     self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\
         parse_metadata_state_descriptions('Study:*,!Dog;BodySite:Stool')), ['e'])
     self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\
         parse_metadata_state_descriptions('BodySite:Stool')), ['a','b','e'])
    def test_parse_metadata_state_descriptions(self):
        """parse_metadata_state_descriptions should return correct states from string."""
        s = ""
        self.assertEqual(parse_metadata_state_descriptions(s), {})
        s = "Study:Twin,Hand,Dog;BodySite:Palm,Stool"
        self.assertEqual(
            parse_metadata_state_descriptions(s),
            {"Study": set(["Twin", "Hand", "Dog"]), "BodySite": set(["Palm", "Stool"])},
        )

        # category names with colons i. e. ontology-derived
        s = "Study:Twin,Hand,Dog;site:UBERON:feces,UBERON:ear canal;" + "env_feature:ENVO:farm soil,ENVO:national park"
        self.assertEqual(
            parse_metadata_state_descriptions(s),
            {
                "Study": set(["Twin", "Hand", "Dog"]),
                "site": set(["UBERON:feces", "UBERON:ear canal"]),
                "env_feature": set(["ENVO:farm soil", "ENVO:national park"]),
            },
        )

        s = (
            "Treatment:A,B,C;env_matter:ENVO:nitsol,ENVO:farm soil;env_biom:"
            + "ENVO:Tropical dry (including Monsoon forests) and woodlands,"
            + "ENVO:Forest: including woodlands;country:GAZ:Persnickety Islands"
            + ",St. Kitt's and Nevis"
        )
        self.assertEqual(
            parse_metadata_state_descriptions(s),
            {
                "country": set(["GAZ:Persnickety Islands", "St. Kitt's and Nevis"]),
                "env_biom": set(
                    [
                        "ENVO:Tropical dry (including Monsoon forests) " + "and woodlands",
                        "ENVO:Forest: including woodlands",
                    ]
                ),
                "env_matter": set(["ENVO:nitsol", "ENVO:farm soil"]),
                "Treatment": set(["A", "B", "C"]),
            },
        )
def sample_ids_from_metadata_description(mapping_f,valid_states_str):
    """ Given a description of metadata, return the corresponding sample ids
    """
    map_data, map_header, map_comments = parse_mapping_file(mapping_f)
    valid_states = parse_metadata_state_descriptions(valid_states_str)
    sample_ids = get_sample_ids(map_data, map_header, valid_states)

    if len(sample_ids)<1:
        raise ValueError,"All samples have been filtered out for the criteria"+\
            " described in the valid states"

    return sample_ids
Example #5
0
def sample_ids_from_metadata_description(mapping_f, valid_states_str):
    """ Given a description of metadata, return the corresponding sample ids
    """
    map_data, map_header, map_comments = parse_mapping_file(mapping_f)
    valid_states = parse_metadata_state_descriptions(valid_states_str)
    sample_ids = get_sample_ids(map_data, map_header, valid_states)

    if len(sample_ids) < 1:
        raise ValueError,"All samples have been filtered out for the criteria"+\
            " described in the valid states"

    return sample_ids