def get_sam_ids(map_data, map_header, colorby, cat, primary_state,
                secondary_state):
    """ returns all sample ids matching the state strings and colorby:cat

    colorby: eg: 'Country', or pass None to not filter only colorby:cat samples
    cat: e.g.: 'USA'
    primary_state: e.g.: 'AgeCategory:Child'
    secondary state can be None, or like primary state

    returns uniquified lists in randomized order
    """
    if colorby is None:
        sample_ids = [sam[0] for sam in map_data]
    else:

        sample_ids = get_sample_ids(map_data, map_header, {colorby: [cat]})
    # primary key is the category label, e.g. AgeCategory
    # value is the val for that category, e.g. Adult

    # go through age1/age2
    primary_states = parse_metadata_state_descriptions(primary_state)
    if colorby is not None:
        primary_states[colorby] = [cat]
    state1_samids = get_sample_ids(map_data, map_header, primary_states)

    if secondary_state is None:
        state2_samids = set(sample_ids).difference(set(state1_samids))
    else:
        secondary_states =\
            parse_metadata_state_descriptions(secondary_state)
        if colorby is not None:
            secondary_states[colorby] = [cat]
        state2_samids = get_sample_ids(map_data, map_header, secondary_states)

    return list(set(state1_samids)), list(set(state2_samids))
def get_sam_ids(map_data, map_header, colorby, cat, primary_state, secondary_state):
    """ returns all sample ids matching the state strings and colorby:cat
    
    colorby: eg: 'Country', or pass None to not filter only colorby:cat samples
    cat: e.g.: 'USA'
    primary_state: e.g.: 'AgeCategory:Child'
    secondary state can be None, or like primary state

    returns uniquified lists in randomized order
    """
    if colorby == None:
        sample_ids = [sam[0] for sam in map_data]
    else:

        sample_ids = get_sample_ids(map_data, map_header, {colorby: [cat]})
    # primary key is the category label, e.g. AgeCategory
    # value is the val for that category, e.g. Adult

    # go through age1/age2
    primary_states = parse_metadata_state_descriptions(primary_state)
    if colorby != None:
        primary_states[colorby] = [cat]
    state1_samids = get_sample_ids(map_data, map_header, primary_states)

    if secondary_state == None:
        state2_samids = set(sample_ids).difference(set(state1_samids))
    else:
        secondary_states = parse_metadata_state_descriptions(secondary_state)
        if colorby != None:
            secondary_states[colorby] = [cat]
        state2_samids = get_sample_ids(map_data, map_header, secondary_states)

    return list(set(state1_samids)), list(set(state2_samids))
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    negate = opts.negate
    sample_ids = opts.sample_ids
    mapping_fp = opts.mapping_fp
    input_fasta_fp = opts.input_fasta_fp
    output_fasta_fp = opts.output_fasta_fp

    if not mapping_fp:
        sample_ids = sample_ids.split(',')
    else:
        map_data, map_header, map_comments = parse_mapping_file(mapping_fp)
        sample_ids = get_sample_ids(
            map_data,
            map_header,
            parse_metadata_state_descriptions(sample_ids))
        if len(sample_ids) == 0:
            raise ValueError(
                "No samples match the search criteria: %s" %
                valid_states)

    if opts.verbose:
        # This is useful when using the --valid_states feature so you can
        # find out if a search query didn't work as you expected before a
        # lot of time is spent
        print "Extracting samples: %s" % ', '.join(sample_ids)

    try:
        seqs = parse_fasta(open(input_fasta_fp))
    except IOError:
        option_parser.error(
            'Cannot open %s. Does it exist? Do you have read access?' %
            input_fasta_fp)
        exit(1)

    try:
        output_fasta_f = open(output_fasta_fp, 'w')
    except IOError:
        option_parser.error(
            "Cannot open %s. Does path exist? Do you have write access?" %
            output_fasta_fp)
        exit(1)

    for r in extract_seqs_by_sample_id(seqs, sample_ids, negate):
        output_fasta_f.write('>%s\n%s\n' % r)
    output_fasta_f.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    negate = opts.negate
    sample_ids = opts.sample_ids
    mapping_fp = opts.mapping_fp
    input_fasta_fp = opts.input_fasta_fp
    output_fasta_fp = opts.output_fasta_fp

    if not mapping_fp:
        sample_ids = sample_ids.split(',')
    else:
        map_data, map_header, map_comments = parse_mapping_file(mapping_fp)
        sample_ids = get_sample_ids(
            map_data,
            map_header,
            parse_metadata_state_descriptions(sample_ids))
        if len(sample_ids) == 0:
            raise ValueError(
                "No samples match the search criteria: %s" %
                valid_states)

    if opts.verbose:
        # This is useful when using the --valid_states feature so you can
        # find out if a search query didn't work as you expected before a
        # lot of time is spent
        print "Extracting samples: %s" % ', '.join(sample_ids)

    try:
        seqs = parse_fasta(open(input_fasta_fp))
    except IOError:
        option_parser.error(
            'Cannot open %s. Does it exist? Do you have read access?' %
            input_fasta_fp)
        exit(1)

    try:
        output_fasta_f = open(output_fasta_fp, 'w')
    except IOError:
        option_parser.error(
            "Cannot open %s. Does path exist? Do you have write access?" %
            output_fasta_fp)
        exit(1)

    for r in extract_seqs_by_sample_id(seqs, sample_ids, negate):
        output_fasta_f.write('>%s\n%s\n' % r)
    output_fasta_f.close()