Esempio n. 1
0
 def test_parse_metadata_state_descriptions(self):
     """parse_metadata_state_descriptions should return correct states from string."""
     s = ''
     self.assertEqual(parse_metadata_state_descriptions(s), {})
     s = 'Study:Twin,Hand,Dog;BodySite:Palm,Stool'
     self.assertEqual(parse_metadata_state_descriptions(s), {'Study':set(['Twin','Hand','Dog']),
         'BodySite':set(['Palm','Stool'])})
def get_sam_ids(map_data, map_header, colorby, cat, primary_state, secondary_state):
    """ returns all sample ids matching the state strings and colorby:cat
    
    colorby: eg: 'Country', or pass None to not filter only colorby:cat samples
    cat: e.g.: 'USA'
    primary_state: e.g.: 'AgeCategory:Child'
    secondary state can be None, or like primary state

    returns uniquified lists in randomized order
    """
    if colorby == None:
        sample_ids = [sam[0] for sam in map_data]
    else:

        sample_ids = get_sample_ids(map_data, map_header, {colorby: [cat]})
    # primary key is the category label, e.g. AgeCategory
    # value is the val for that category, e.g. Adult

    # go through age1/age2
    primary_states = parse_metadata_state_descriptions(primary_state)
    if colorby != None:
        primary_states[colorby] = [cat]
    state1_samids = get_sample_ids(map_data, map_header, primary_states)

    if secondary_state == None:
        state2_samids = set(sample_ids).difference(set(state1_samids))
    else:
        secondary_states = parse_metadata_state_descriptions(secondary_state)
        if colorby != None:
            secondary_states[colorby] = [cat]
        state2_samids = get_sample_ids(map_data, map_header, secondary_states)

    return list(set(state1_samids)), list(set(state2_samids))
def get_sam_ids(map_data, map_header, colorby, cat, primary_state,
                secondary_state):
    """ returns all sample ids matching the state strings and colorby:cat

    colorby: eg: 'Country', or pass None to not filter only colorby:cat samples
    cat: e.g.: 'USA'
    primary_state: e.g.: 'AgeCategory:Child'
    secondary state can be None, or like primary state

    returns uniquified lists in randomized order
    """
    if colorby is None:
        sample_ids = [sam[0] for sam in map_data]
    else:

        sample_ids = get_sample_ids(map_data, map_header, {colorby: [cat]})
    # primary key is the category label, e.g. AgeCategory
    # value is the val for that category, e.g. Adult

    # go through age1/age2
    primary_states = parse_metadata_state_descriptions(primary_state)
    if colorby is not None:
        primary_states[colorby] = [cat]
    state1_samids = get_sample_ids(map_data, map_header, primary_states)

    if secondary_state is None:
        state2_samids = set(sample_ids).difference(set(state1_samids))
    else:
        secondary_states =\
            parse_metadata_state_descriptions(secondary_state)
        if colorby is not None:
            secondary_states[colorby] = [cat]
        state2_samids = get_sample_ids(map_data, map_header, secondary_states)

    return list(set(state1_samids)), list(set(state2_samids))
Esempio n. 4
0
 def test_parse_metadata_state_descriptions(self):
     """parse_metadata_state_descriptions should return correct states from string."""
     s = ''
     self.assertEqual(parse_metadata_state_descriptions(s), {})
     s = 'Study:Twin,Hand,Dog;BodySite:Palm,Stool'
     self.assertEqual(
         parse_metadata_state_descriptions(s), {
             'Study': set(['Twin', 'Hand', 'Dog']),
             'BodySite': set(['Palm', 'Stool'])
         })
 def test_get_sample_ids(self):
     """get_sample_ids should return sample ids matching criteria."""
     self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\
         parse_metadata_state_descriptions('Study:Twin')), [])
     self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\
         parse_metadata_state_descriptions('Study:Dog')), ['a','b'])
     self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\
         parse_metadata_state_descriptions('Study:*,!Dog')), ['c','d','e'])
     self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\
         parse_metadata_state_descriptions('Study:*,!Dog;BodySite:Stool')), ['e'])
     self.assertEqual(get_sample_ids(self.map_data, self.map_headers,\
         parse_metadata_state_descriptions('BodySite:Stool')), ['a','b','e'])
Esempio n. 6
0
def sample_ids_from_metadata_description(mapping_f,valid_states_str):
    """ Given a description of metadata, return the corresponding sample ids
    """
    map_data, map_header, map_comments = parse_mapping_file(mapping_f)
    valid_states = parse_metadata_state_descriptions(valid_states_str)
    sample_ids = get_sample_ids(map_data, map_header, valid_states)
    return sample_ids
def filter_otus_and_map(map_infile, otu_infile, map_outfile, otu_outfile, 
    valid_states_str, num_seqs_per_otu):
    """Filters OTU and map files according to specified criteria."""
    map_data, map_header, map_comments = parse_mapping_file(map_infile)
    map_infile.close()
    valid_states = parse_metadata_state_descriptions(valid_states_str)
    sample_ids = get_sample_ids(map_data, map_header, valid_states)

    # write out the filtered mapping file
    out_headers, out_data = filter_map(map_data, map_header, sample_ids)
    header_line = '#' + '\t'.join(out_headers)
    map_outfile.write('\n'.join([header_line] + map('\t'.join, out_data)))
    if not isinstance(map_outfile, StringIO):
        map_outfile.close()

    # write out the filtered OTU file
    for line in otu_infile:
        if line.startswith('#OTU ID'):
            fields = map(strip, line.split('\t'))
            cols = find_good_cols(line, sample_ids)
            filter_line(line, cols, min_count=None, outfile=otu_outfile)
        elif line.startswith('#'):
            otu_outfile.write(line)
        else:
            filter_line(line, cols, min_count=num_seqs_per_otu, 
                outfile=otu_outfile)
    if not isinstance(otu_outfile, StringIO):
        otu_outfile.close()
Esempio n. 8
0
def sample_ids_from_metadata_description(mapping_f, valid_states_str):
    """ Given a description of metadata, return the corresponding sample ids
    """
    map_data, map_header, map_comments = parse_mapping_file(mapping_f)
    valid_states = parse_metadata_state_descriptions(valid_states_str)
    sample_ids = get_sample_ids(map_data, map_header, valid_states)
    return sample_ids
Esempio n. 9
0
def filter_otus_and_map(map_infile, otu_infile, map_outfile, otu_outfile, 
    valid_states_str, num_seqs_per_otu):
    """Filters OTU and map files according to specified criteria."""
    map_data, map_header, map_comments = parse_mapping_file(map_infile)
    map_infile.close()
    valid_states = parse_metadata_state_descriptions(valid_states_str)
    sample_ids = get_sample_ids(map_data, map_header, valid_states)

    # write out the filtered mapping file
    out_headers, out_data = filter_map(map_data, map_header, sample_ids)
    header_line = '#' + '\t'.join(out_headers)
    map_outfile.write('\n'.join([header_line] + map('\t'.join, out_data)))
    if not isinstance(map_outfile, StringIO):
        map_outfile.close()

    # write out the filtered OTU file
    for line in otu_infile:
        if line.startswith('#OTU ID'):
            fields = map(strip, line.split('\t'))
            cols = find_good_cols(line, sample_ids)
            filter_line(line, cols, min_count=None, outfile=otu_outfile)
        elif line.startswith('#'):
            otu_outfile.write(line)
        else:
            filter_line(line, cols, min_count=num_seqs_per_otu, 
                outfile=otu_outfile)
    if not isinstance(otu_outfile, StringIO):
        otu_outfile.close()
Esempio n. 10
0
def sample_ids_from_metadata_description(mapping_f, valid_states_str):
    """ Given a description of metadata, return the corresponding sample ids
    """
    map_data, map_header, map_comments = parse_mapping_file(mapping_f)
    valid_states = parse_metadata_state_descriptions(valid_states_str)
    sample_ids = get_sample_ids(map_data, map_header, valid_states)

    if len(sample_ids) < 1:
        raise ValueError("All samples have been filtered out for the criteria" " described in the valid states")

    return sample_ids
Esempio n. 11
0
def sample_ids_from_metadata_description(mapping_f, valid_states_str):
    """ Given a description of metadata, return the corresponding sample ids
    """
    map_data, map_header, map_comments = parse_mapping_file(mapping_f)
    valid_states = parse_metadata_state_descriptions(valid_states_str)
    sample_ids = get_sample_ids(map_data, map_header, valid_states)

    if len(sample_ids) < 1:
        raise ValueError,"All samples have been filtered out for the criteria"+\
            " described in the valid states"

    return sample_ids