Python filter_samples_from_otu_table 예제들, qiime.filter.filter_samples_from_otu_table Python 예제들

예제 #1

0

파일 보기

파일: test_filter.py 프로젝트: Ecogenomics/FrankenQIIME

 def test_filter_samples_from_otu_table(self):
     """filter_samples_from_otu_table functions as expected """
     
     actual = filter_samples_from_otu_table(self.input_otu_table1,
                                           ["DEF","GHI tfasd"])
     self.assertEqual(actual,self.expected_otu_table1c)
     
     # order of otu table is retained regardless of samples_to_keep order
     actual = filter_samples_from_otu_table(self.input_otu_table1,
                                            ["XYZ"])
     self.assertEqual(actual,self.expected_otu_table1d)

예제 #2

0

파일 보기

파일: rarefaction.py 프로젝트: rob-knight/qiime

def get_rare_data(otu_table, 
                  seqs_per_sample, 
                  include_small_samples=False, 
                  subsample_f=subsample):
    """Filter OTU table to keep only desired sample sizes.
    
    - include_small_sampes=False => do not write samples with < seqs_per_sample
    total sequecnes
    - otu_table (input and out) is otus(rows) by samples (cols)
    - no otus are removed, even if they are absent in the rarefied table"""

    if not include_small_samples:
        otu_table = filter_samples_from_otu_table(otu_table, otu_table.SampleIds, seqs_per_sample, inf)

    # subsample samples that have too many sequences
    def func(x, s_id, s_md):
        if x.sum() < seqs_per_sample:
            return x
        else:
            return subsample_f(x, seqs_per_sample)

    subsampled_otu_table = otu_table.transformSamples(func)
    
    # remove small samples if required

    return subsampled_otu_table

예제 #3

0

파일 보기

파일: simulate.py 프로젝트: gregcaporaso/microbiogeo

def choose_cluster_subsets(otu_table_f, map_f, category, num_total_samples):
    otu_table = parse_biom_table(otu_table_f)
    metadata_map = MetadataMap.parseMetadataMap(map_f)

    # Dirty... :(
    try:
        map_f.seek(0)
    except AttributeError:
        pass

    if num_total_samples > len(otu_table.SampleIds):
        raise InvalidSubsetSize("Too many total samples (%d) were specified "
                                "as a subset size. There are only %d total "
                                "samples to choose a subset from." %
                                (num_total_samples, len(otu_table.SampleIds)))

    category_map = defaultdict(list)
    for samp_id in metadata_map.SampleIds:
        # Mapping files can have more samples than OTU tables.
        if samp_id in otu_table.SampleIds:
            category_val = metadata_map.getCategoryValue(samp_id, category)
            category_map[category_val].append(samp_id)

    samp_ids_to_keep, extra_samps = _choose_items_from_clusters(
            category_map, otu_table.SampleIds, num_total_samples)
    samp_ids_to_keep.extend(extra_samps)

    assert len(samp_ids_to_keep) == num_total_samples, \
           "%d != %d" % (len(samp_ids_to_keep), num_total_samples)
    assert len(samp_ids_to_keep) == len(set(samp_ids_to_keep)), \
           "Duplicate sample IDs in subset"

    return (filter_samples_from_otu_table(otu_table, samp_ids_to_keep, 0, inf),
            filter_mapping_file_from_mapping_f(map_f, samp_ids_to_keep))

예제 #4

0

파일 보기

파일: simulate.py 프로젝트: gregcaporaso/microbiogeo

def choose_gradient_subset(otu_table_f, map_f, category, num_total_samples):
    otu_table = parse_biom_table(otu_table_f)
    mdm, _ = parse_mapping_file_to_dict(map_f)

    try:
        map_f.seek(0)
    except AttributeError:
        pass

    if num_total_samples > len(otu_table.SampleIds):
        raise InvalidSubsetSize("Too many total samples (%d) were specified "
                                "as a gradient subset size. There are only %d "
                                "total samples to choose a subset from." %
                                (num_total_samples, len(otu_table.SampleIds)))

    # Only keep the sample IDs that are in both the mapping file and OTU table.
    # Sort the samples according to the gradient category.
    samp_ids = [(samp_id, float(metadata[category]))
                for samp_id, metadata in mdm.items()
                if samp_id in otu_table.SampleIds]
    samp_ids.sort(key=lambda samp_id: samp_id[1])

    samp_ids_to_keep = [samp_id[0] for samp_id in
                        _choose_items_from_bins(samp_ids, num_total_samples)]

    assert len(samp_ids_to_keep) == num_total_samples, \
           "%d != %d" % (len(samp_ids_to_keep), num_total_samples)
    assert len(samp_ids_to_keep) == len(set(samp_ids_to_keep)), \
           "Duplicate sample IDs in subset"

    return (filter_samples_from_otu_table(otu_table, samp_ids_to_keep, 0, inf),
            filter_mapping_file_from_mapping_f(map_f, samp_ids_to_keep))

예제 #5

0

파일 보기

파일: cospeciation.py 프로젝트: tanaes/codiversification

def reconcile_hosts_symbionts(otu_file, host_dist):

    # filter cOTU table by samples present in host_tree/dm

    filtered_cotu_table = filter_samples_from_otu_table(otu_file,
                                                        host_dist[0],
                                                        negate=True)

    # Now the cOTU table only has the samples present in the host dm

    # parse the filtered cOTU table
    sample_names, taxon_names, data, lineages = parse_otu_table(
        filtered_cotu_table)

    # filter cOTU table again because skip_empty doesn't seem to be
    # working in format_otu_table called from
    # filter_samples_from_otu_table

    sample_names, taxon_names, data, lineages = filter_otu_table_by_min(
        sample_names, taxon_names, data, lineages, min=1)

    # Filter the host_dists to match the newly trimmed subtree
    # Note: this is requiring the modified filter_dist method which
    # returns a native dm tuple rather than a string.

    host_dist_filtered = filter_samples_from_distance_matrix(
        host_dist, sample_names, negate=True)

    filtered_otu_table_lines = format_otu_table(
        sample_names, taxon_names, data, lineages)

    return StringIO(filtered_otu_table_lines), host_dist_filtered

예제 #6

0

파일 보기

def get_rare_data(otu_table,
                  seqs_per_sample,
                  include_small_samples=False,
                  subsample_f=subsample):
    """Filter OTU table to keep only desired sample sizes.

    - include_small_sampes=False => do not write samples with < seqs_per_sample
    total sequecnes
    - otu_table (input and out) is otus(rows) by samples (cols)
    - no otus are removed, even if they are absent in the rarefied table"""

    with errstate(empty='raise'):
        if not include_small_samples:
            otu_table = filter_samples_from_otu_table(otu_table,
                                                      otu_table.ids(),
                                                      seqs_per_sample, inf)

        # subsample samples that have too many sequences
        def func(x, s_id, s_md):
            if x.sum() < seqs_per_sample:
                return x
            else:
                return subsample_f(x.astype(int), seqs_per_sample)

        subsampled_otu_table = otu_table.transform(func, axis='sample')

        return subsampled_otu_table

예제 #7

0

파일 보기

파일: filter_samples_from_otu_table.py 프로젝트: colinbrislawn/qiime

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    input_fp = opts.input_fp
    output_fp = opts.output_fp

    mapping_fp = opts.mapping_fp
    output_mapping_fp = opts.output_mapping_fp
    valid_states = opts.valid_states
    min_count = opts.min_count
    max_count = opts.max_count
    sample_id_fp = opts.sample_id_fp

    if mapping_fp is None and valid_states is not None:
        option_parser.error("--mapping_fp must be provided if --valid_states " "is passed.")

    if not ((mapping_fp and valid_states) or min_count != 0 or not isinf(max_count) or sample_id_fp is not None):
        option_parser.error(
            "No filtering requested. Must provide either "
            "mapping_fp and valid states, min counts, "
            "max counts, or sample_id_fp (or some combination "
            "of those)."
        )
    if (mapping_fp and valid_states) and sample_id_fp:
        option_parser.error("Providing both --sample_id_fp and " "--mapping_fp/--valid_states is not supported.")
    if output_mapping_fp and not mapping_fp:
        option_parser.error("Must provide input mapping file to generate" " output mapping file.")

    otu_table = load_table(opts.input_fp)

    negate_sample_id_fp = opts.negate_sample_id_fp
    if mapping_fp and valid_states:
        sample_ids_to_keep = sample_ids_from_metadata_description(open(mapping_fp, "U"), valid_states)
        negate_sample_id_fp = False
    else:
        sample_ids_to_keep = otu_table.ids()

        if sample_id_fp is not None:
            o = open(sample_id_fp, "U")
            sample_id_f_ids = set([l.strip().split()[0] for l in o if not l.startswith("#")])
            o.close()
            sample_ids_to_keep = set(sample_ids_to_keep) & sample_id_f_ids

    filtered_otu_table = filter_samples_from_otu_table(
        otu_table, sample_ids_to_keep, min_count, max_count, negate_ids_to_keep=negate_sample_id_fp
    )

    try:
        write_biom_table(filtered_otu_table, output_fp)
    except EmptyBIOMTableError:
        option_parser.error(
            "Filtering resulted in an empty BIOM table. " "This indicates that no samples remained after filtering."
        )

    # filter mapping file if requested
    if output_mapping_fp:
        mapping_data, mapping_headers, _ = parse_mapping_file(open(mapping_fp, "U"))
        mapping_headers, mapping_data = filter_mapping_file(mapping_data, mapping_headers, filtered_otu_table.ids())
        open(output_mapping_fp, "w").write(format_mapping_file(mapping_headers, mapping_data))

예제 #8

0

파일 보기

파일: filter_samples_from_otu_table.py 프로젝트: cmokeefe/qiime

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    input_fp = opts.input_fp
    output_fp = opts.output_fp

    mapping_fp = opts.mapping_fp
    output_mapping_fp = opts.output_mapping_fp
    valid_states = opts.valid_states
    min_count = opts.min_count
    max_count = opts.max_count
    sample_id_fp = opts.sample_id_fp

    if not ((mapping_fp and valid_states) or
            min_count != 0 or
            not isinf(max_count) or
            sample_id_fp is not None):
        option_parser.error("No filtering requested. Must provide either "
                            "mapping_fp and valid states, min counts, "
                            "max counts, or sample_id_fp (or some combination "
                            "of those).")
    if output_mapping_fp and not mapping_fp:
        option_parser.error("Must provide input mapping file to generate"
                            " output mapping file.")

    otu_table =  load_table(opts.input_fp)

    if mapping_fp and valid_states:
        sample_ids_to_keep = sample_ids_from_metadata_description(
            open(mapping_fp, 'U'), valid_states)
    else:
        sample_ids_to_keep = otu_table.ids()

    if sample_id_fp is not None:
        sample_id_f_ids = set([l.strip().split()[0]
                              for l in open(sample_id_fp, 'U') if not l.startswith('#')])
        sample_ids_to_keep = set(sample_ids_to_keep) & sample_id_f_ids

    filtered_otu_table = filter_samples_from_otu_table(otu_table,
                                                       sample_ids_to_keep,
                                                       min_count,
                                                       max_count)
    write_biom_table(filtered_otu_table, output_fp)

    # filter mapping file if requested
    if output_mapping_fp:
        mapping_data, mapping_headers, _ = parse_mapping_file(
            open(mapping_fp, 'U'))
        mapping_headers, mapping_data = \
            filter_mapping_file(
                mapping_data,
                mapping_headers,
                filtered_otu_table.ids())
        open(
            output_mapping_fp,
            'w').write(
            format_mapping_file(
                mapping_headers,
                mapping_data))

예제 #9

0

파일 보기

파일: filter_samples_from_otu_table.py 프로젝트: wilkox/qiime

def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    input_fp = opts.input_fp
    output_fp = opts.output_fp

    mapping_fp = opts.mapping_fp
    output_mapping_fp = opts.output_mapping_fp
    valid_states = opts.valid_states
    min_count = opts.min_count
    max_count = opts.max_count
    sample_id_fp = opts.sample_id_fp

    if not ((mapping_fp and valid_states) or min_count != 0
            or not isinf(max_count) or sample_id_fp is not None):
        option_parser.error(
            "No filtering requested. Must provide either "
            "mapping_fp and valid states, min counts, "
            "max counts, or sample_id_fp (or some combination of those).")
    if output_mapping_fp and not mapping_fp:
        option_parser.error("Must provide input mapping file to generate"
                            " output mapping file.")

    otu_table = parse_biom_table(open(opts.input_fp, 'U'))
    output_f = open(opts.output_fp, 'w')

    if (mapping_fp and valid_states):
        sample_ids_to_keep = sample_ids_from_metadata_description(
            open(mapping_fp, 'U'), valid_states)
    else:
        sample_ids_to_keep = otu_table.SampleIds

    if (sample_id_fp is not None):
        sample_id_f_ids = set([
            l.strip().split()[0] for l in open(sample_id_fp, 'U')
            if not l.startswith('#')
        ])
        sample_ids_to_keep = set(sample_ids_to_keep) & sample_id_f_ids

    filtered_otu_table = filter_samples_from_otu_table(otu_table,
                                                       sample_ids_to_keep,
                                                       min_count, max_count)
    output_f.write(format_biom_table(filtered_otu_table))
    output_f.close()

    # filter mapping file if requested
    if output_mapping_fp:
        mapping_data, mapping_headers, _ = parse_mapping_file(
            open(mapping_fp, 'U'))
        mapping_headers, mapping_data = \
            filter_mapping_file(
                mapping_data,
                mapping_headers,
                filtered_otu_table.SampleIds)
        open(output_mapping_fp,
             'w').write(format_mapping_file(mapping_headers, mapping_data))

예제 #10

0

파일 보기

파일: make_otu_heatmap.py 프로젝트: Honglongwu/qiime

def get_order_from_categories(otu_table, category_labels):
    """Groups samples by category values; clusters within each group"""
    category_labels = array(category_labels)
    sample_order = []

    for label in unique(category_labels):
        label_ix = category_labels == label
        selected = [s for (i, s) in zip(label_ix, otu_table.ids()) if i]
        sub_otu_table = filter_samples_from_otu_table(otu_table, selected, -inf, inf)
        data = asarray([val for val in sub_otu_table.iter_data(axis="observation")])
        label_ix_ix = get_clusters(data, axis="column")

        sample_order += list(nonzero(label_ix)[0][array(label_ix_ix)])
    return array(sample_order)

예제 #11

0

파일 보기

def get_order_from_categories(otu_table, category_labels):
    """Groups samples by category values; clusters within each group"""
    category_labels = np.array(category_labels)
    sample_order = []

    for label in np.unique(category_labels):
        label_ix = category_labels == label
        selected = [s for (i, s) in zip(label_ix, otu_table.ids()) if i]
        sub_otu_table = filter_samples_from_otu_table(otu_table, selected,
                                                      -np.inf, np.inf)
        data = np.asarray(list(sub_otu_table.iter_data(axis='observation')))
        label_ix_ix = get_clusters(data, axis='column')

        sample_order += list(np.nonzero(label_ix)[0][np.array(label_ix_ix)])
    return np.array(sample_order)

예제 #12

0

파일 보기

파일: make_otu_heatmap.py 프로젝트: ElDeveloper/qiime

def get_order_from_categories(otu_table, category_labels):
    """Groups samples by category values; clusters within each group"""
    category_labels = np.array(category_labels)
    sample_order = []

    for label in np.unique(category_labels):
        label_ix = category_labels == label
        selected = [s for (i, s) in zip(label_ix, otu_table.ids()) if i]
        sub_otu_table = filter_samples_from_otu_table(otu_table, selected,
                                                      -np.inf, np.inf)
        data = np.asarray(list(sub_otu_table.iter_data(axis='observation')))
        label_ix_ix = get_clusters(data, axis='column')

        sample_order += list(np.nonzero(label_ix)[0][np.array(label_ix_ix)])
    return np.array(sample_order)

예제 #13

0

파일 보기

파일: make_otu_heatmap.py 프로젝트: franny911/qiime

def get_order_from_categories(otu_table, category_labels):
    """Groups samples by category values; clusters within each group"""
    category_labels = array(category_labels)
    sample_order = []

    for label in unique(category_labels):
        label_ix = category_labels == label
        selected = [s for (i, s) in zip(label_ix, otu_table.SampleIds) if i]
        sub_otu_table = filter_samples_from_otu_table(otu_table, selected, 0,
                                                      inf)
        data = asarray([val for val in sub_otu_table.iterObservationData()])
        label_ix_ix = get_clusters(data, axis='column')

        sample_order += list(nonzero(label_ix)[0][array(label_ix_ix)])
    return array(sample_order)

예제 #14

0

파일 보기

파일: make_otu_heatmap.py 프로젝트: franny911/qiime

def get_overlapping_samples(map_rows, otu_table):
    """Extracts only samples contained in otu table and mapping file.

       Returns: new_map_rows, new_otu_table
    """
    map_sample_ids = zip(*map_rows)[0]
    shared_ids = set(map_sample_ids) & set(otu_table.SampleIds)

    otu_table = filter_samples_from_otu_table(otu_table, shared_ids, 0, inf)

    new_map = []
    for sam_id in map_sample_ids:
        if sam_id in shared_ids:
            ix = map_sample_ids.index(sam_id)
            new_map.append(map_rows[ix])

    return new_map, otu_table

예제 #15

0

파일 보기

파일: make_otu_heatmap.py 프로젝트: jrherr/qiime

def get_overlapping_samples(map_rows, otu_table):
    """Extracts only samples contained in otu table and mapping file.

       Returns: new_map_rows, new_otu_table
    """
    map_sample_ids = zip(*map_rows)[0]
    shared_ids = set(map_sample_ids) & set(otu_table.sample_ids)

    otu_table = filter_samples_from_otu_table(otu_table, shared_ids, -inf, inf)

    new_map = []
    for sam_id in map_sample_ids:
        if sam_id in shared_ids:
            ix = map_sample_ids.index(sam_id)
            new_map.append(map_rows[ix])

    return new_map, otu_table

예제 #16

0

파일 보기

파일: test_filter.py 프로젝트: Ecogenomics/FrankenQIIME

 def test_filter_samples_from_otu_table_negate(self):
     """filter_samples_from_otu_table functions w negate """
     actual = filter_samples_from_otu_table(self.input_otu_table1,
                                            ["ABC blah","XYZ"],
                                            negate=True)
     self.assertEqual(actual,self.expected_otu_table1c)

예제 #17

0

파일 보기

파일: process_new_study.py 프로젝트: Jmaldo27/Evident

def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    otu_table_fp = opts.otu_table_fp
    output_dir = opts.output_dir
    mapping_fp = opts.mapping_fp
    tree_fp = opts.tree_fp
    verbose = opts.verbose
    print_only = opts.print_only
    seqs_per_sample = int(opts.seqs_per_sample)
    parallel = opts.parallel
    min_seqs_sample = opts.min_seqs_sample
    subject_category = opts.subject_name

    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")


    ## ******************** make_evident_selectors ********************
    ## The code for make_evident_selectors.py is here and has to go before the params
    ## validation as we need to know the main cats before creating the params file
    map_data, headers, comments = parse_mapping_file(open(mapping_fp, 'U'))
    biom_table = parse_biom_table(open(otu_table_fp, 'U'))

    # getting valid samples from biom file
    real_map_headers, real_map_data = filter_mapping_file(map_data, headers,\
        biom_table.SampleIds, include_repeat_cols=False)

    if subject_category not in real_map_headers:
        option_parser.error('This column: %s is not in the mapping file, try %s'%\
            (subject_category, real_map_headers))
 
    sorted_counts_per_sample = get_sorted_counts_per_sample(biom_table)

    mapping_file_tuple = (real_map_data, real_map_headers)

    # calculate the available subjects at each rarefaction level
    results, main_map_cat = make_selectors(sorted_counts_per_sample, min_seqs_sample,\
        mapping_file_tuple, subject_category, verbose=verbose)

    fout = open(join(output_dir,'selectors.txt'),'w')
    fout.write('#Sequences\tSubjects\tSamples\tMetadata\n')
    fout.write('\n'.join(results))
    fout.close()
    
    fout = open(join(output_dir,'mapping_file.txt'),'w')
    fout.write(format_mapping_file(real_map_headers, real_map_data))
    fout.close()
    ## ******************** make_evident_selectors ********************

    fout = open(join(output_dir,'study_preferences.txt'),'w')
    fout.write('%d\n' % seqs_per_sample)
    fout.write('%s\n' % subject_category)
    fout.close()

    ## ******************** filter_samples_from_otu_table ********************
    ## Filtering original biom file to only have samples above the max length to avoid
    ## ugly plots
    alpha_biom_file = join(output_dir,'filtered_otu_table_for_alpha.biom')
    fout = open(alpha_biom_file,'w')
    sample_ids_to_keep = biom_table.SampleIds
    filtered_otu_table = filter_samples_from_otu_table(biom_table,
                                                       sample_ids_to_keep,
                                                       min_count=seqs_per_sample,
                                                       max_count=inf)
    fout.write(format_biom_table(filtered_otu_table))
    fout.close()
    ## ******************** filter_samples_from_otu_table ********************

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            option_parser.error("Can't open parameters file (%s). Does it exist? " \
            "Do you have read access?" % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters(
            ['beta_diversity:metrics unweighted_unifrac',\
             'make_rarefaction_plots:prefs_path %s' % join(output_dir,'prefs.txt'),
             'make_rarefaction_plots:colorby %s' % ','.join(main_map_cat), 
             'make_rarefaction_plots:output_type memory', 
             'multiple_rarefactions:min %d' % int(seqs_per_sample/4),
             'multiple_rarefactions:max %d' % (seqs_per_sample+1),
             'multiple_rarefactions:step %d' % int(seqs_per_sample/4),
             'multiple_rarefactions:num-reps 4',
            ])
        # empty list returns empty defaultdict for now
    
    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)


    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates
    
    copyfile(otu_table_fp, join(output_dir,'raw.biom'))
    
    run_beta_diversity_through_plots(otu_table_fp=otu_table_fp,
     mapping_fp=mapping_fp,
     output_dir=output_dir,
     command_handler=command_handler,
     params=params,
     qiime_config=qiime_config,
     color_by_interesting_fields_only=False,
     sampling_depth=seqs_per_sample,
     histogram_categories=None,
     tree_fp=tree_fp,
     parallel=parallel,
     suppress_3d_plots=True,
     suppress_2d_plots=True,
     status_update_callback=status_update_callback)
    
    output_dir = join(output_dir,'alpha')
    run_alpha_rarefaction(otu_table_fp=alpha_biom_file,\
     mapping_fp=mapping_fp,\
     output_dir=output_dir,\
     command_handler=command_handler,\
     params=params,
     qiime_config=qiime_config,\
     tree_fp=tree_fp,\
     num_steps=4,\
     parallel=parallel,\
     min_rare_depth=10,
     max_rare_depth=20,
     status_update_callback=status_update_callback,
     plot_stderr_and_stddev=True)

예제 #18

0

파일 보기

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    input_fp = opts.input_fp
    output_fp = opts.output_fp

    mapping_fp = opts.mapping_fp
    output_mapping_fp = opts.output_mapping_fp
    valid_states = opts.valid_states
    min_count = opts.min_count
    max_count = opts.max_count
    sample_id_fp = opts.sample_id_fp

    if (mapping_fp is None and valid_states is not None):
        option_parser.error("--mapping_fp must be provided if --valid_states "
                            "is passed.")

    if not ((mapping_fp and valid_states) or min_count != 0
            or not isinf(max_count) or sample_id_fp is not None):
        option_parser.error("No filtering requested. Must provide either "
                            "mapping_fp and valid states, min counts, "
                            "max counts, or sample_id_fp (or some combination "
                            "of those).")
    if (mapping_fp and valid_states) and sample_id_fp:
        option_parser.error("Providing both --sample_id_fp and "
                            "--mapping_fp/--valid_states is not supported.")
    if output_mapping_fp and not mapping_fp:
        option_parser.error("Must provide input mapping file to generate"
                            " output mapping file.")

    otu_table = load_table(opts.input_fp)

    negate_sample_id_fp = opts.negate_sample_id_fp
    if mapping_fp and valid_states:
        sample_ids_to_keep = sample_ids_from_metadata_description(
            open(mapping_fp, 'U'), valid_states)
        negate_sample_id_fp = False
    else:
        sample_ids_to_keep = otu_table.ids()

        if sample_id_fp is not None:
            o = open(sample_id_fp, 'U')
            sample_id_f_ids = set(
                [l.strip().split()[0] for l in o if not l.startswith('#')])
            o.close()
            sample_ids_to_keep = set(sample_ids_to_keep) & sample_id_f_ids

    filtered_otu_table = filter_samples_from_otu_table(
        otu_table,
        sample_ids_to_keep,
        min_count,
        max_count,
        negate_ids_to_keep=negate_sample_id_fp)

    try:
        write_biom_table(filtered_otu_table, output_fp)
    except EmptyBIOMTableError:
        option_parser.error(
            "Filtering resulted in an empty BIOM table. "
            "This indicates that no samples remained after filtering.")

    # filter mapping file if requested
    if output_mapping_fp:
        mapping_data, mapping_headers, _ = parse_mapping_file(
            open(mapping_fp, 'U'))
        mapping_headers, mapping_data = \
            filter_mapping_file(
                mapping_data,
                mapping_headers,
                filtered_otu_table.ids())
        open(output_mapping_fp,
             'w').write(format_mapping_file(mapping_headers, mapping_data))