Beispiel #1
0
def simsam_range(table, tree, simulated_sample_sizes, dissimilarities, mapping_f=None):
    """Applies sim_otu_table over a range of parameters

     table: the input table to simulate samples from
     tree: tree related OTUs in input table
     simulated_sample_sizes: a list of ints defining how many
      output samples should be create per input sample
     dissimilarities: a list of floats containing the
      dissimilarities to use in simulating tables
     mapping_f: file handle for metadata mapping file, if
      a mapping file should be created with the samples from
      each simulated table

     This function will yield tuples with the following form:
      (output table, output mapping lines, simulated_sample_size, dissimilarity)

     If the user does not provide mapping_f, the tuples will look like:
      (output table, None, simulated_sample_size, dissimilarity)

    """
    if mapping_f is not None:
        # if the user provided a mapping file, load it into
        # a list for repeated use, and define the function for
        # processing the mapping file
        mapping_lines = list(mapping_f)
        process_map = create_replicated_mapping_file
    else:
        # otherwise create a dummy function for processing the
        # mapping file so we don't have to check whether it
        # exists on every iteration
        mapping_lines = None

        def process_map(mapping_lines, simulated_sample_size, sample_ids):
            return None

    for simulated_sample_size in simulated_sample_sizes:
        # create the output mapping file data
        output_mapping_lines = process_map(mapping_lines, simulated_sample_size, table.ids())
        for dissimilarity in dissimilarities:
            # create the simulated otu table
            output_sample_ids, output_otu_ids, output_data, output_metadata = sim_otu_table(
                table.ids(),
                table.ids(axis="observation").tolist(),
                table.iter(),
                table.metadata(axis="observation"),
                tree,
                simulated_sample_size,
                dissimilarity,
            )
            output_table = Table(
                output_data,
                output_otu_ids,
                output_sample_ids,
                observation_metadata=output_metadata,
                generated_by=get_generated_by_for_biom_tables(),
                create_date=datetime.now().isoformat(),
            )
            yield (output_table, output_mapping_lines, simulated_sample_size, dissimilarity)
Beispiel #2
0
def make_otu_table(otu_map_f, otu_to_taxonomy=None, delim='_', table_id=None,
                   otu_ids_to_exclude=None, sample_metadata=None, seq_counts=None):
    """Generate a BIOM table from an OTU map

    Parameters
    ----------
    otu_map_f : file-like object
        The OTU map. Jagged tab-separated file where the first column contains
        the OTU ID and subsequent columns contain sequence IDs belonging to
        that OTU
    otu_to_taxonomy : dict, optional
        Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies
    delim : str, optional
        Defaults to "_". The delimiter that is used in the sequence IDs to join
        the sample ID to the sequence number
    table_id : object, optional
        Defaults to ``None``. The identifier that will be given to the
        generated BIOM table
    otu_ids_to_exclude : iterable, optional
        Defaults to ``None``. If present, these OTUs will not be added to the
        OTU table from the OTU map
    sample_metadata : dict of dicts, optional
        Defaults to ``None``. If supplied, keys in the outer dict should be
        sample IDs, and keys in the inner dicts should be column names.
    seq_counts : dict, optional
        Defaults to ``None``. If supplied, the dict maps seq ids to seq counts

    """
    data, sample_ids, otu_ids = parse_otu_map(
        otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude,seq_counts=seq_counts)

    if otu_to_taxonomy is not None:
        otu_metadata = []
        for o in otu_ids:
            otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])})
    else:
        otu_metadata = None

    # if sample_metadata is supplied, put in index-order with the OTU map's
    # sample_ids, and do not include samples that were in the mapping file
    # but NOT in the OTU map
    if sample_metadata is not None:
        try:
            sample_metadata = [sample_metadata[sample_id]
                               for sample_id in sample_ids]
        except KeyError:
            raise KeyError("Sample IDs found in OTU map without sample "
                           "metadata")

    try:
        return Table(data, otu_ids, sample_ids,
                     observation_metadata=otu_metadata, 
                     sample_metadata=sample_metadata, table_id=table_id,
                     generated_by=get_generated_by_for_biom_tables(),
                     create_date=datetime.now().isoformat())
    except ValueError as e:
        raise ValueError("Couldn't create OTU table. Is your OTU map empty?"
                         " Original error message: %s" % (str(e)))
Beispiel #3
0
def make_otu_table(otu_map_f, otu_to_taxonomy=None, delim='_', table_id=None,
                   otu_ids_to_exclude=None, sample_metadata=None):
    """Generate a BIOM table from an OTU map

    Parameters
    ----------
    otu_map_f : file-like object
        The OTU map. Jagged tab-separated file where the first column contains
        the OTU ID and subsequent columns contain sequence IDs belonging to
        that OTU
    otu_to_taxonomy : dict, optional
        Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies
    delim : str, optional
        Defaults to "_". The delimiter that is used in the sequence IDs to join
        the sample ID to the sequence number
    table_id : object, optional
        Defaults to ``None``. The identifier that will be given to the
        generated BIOM table
    otu_ids_to_exclude : iterable, optional
        Defaults to ``None``. If present, these OTUs will not be added to the
        OTU table from the OTU map
    sample_metadata : iterable of dicts, optional
        Defaults to ``None``.
    """
    data, sample_ids, otu_ids = parse_otu_map(
        otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude)

    if otu_to_taxonomy is not None:
        otu_metadata = []
        for o in otu_ids:
            otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])})
    else:
        otu_metadata = None

#    if sample_metadata is not None:
#        raise NotImplementedError("Passing of sample metadata to "
#                                  "make_otu_table is not currently supported.")
    try:
        return Table(data, otu_ids, sample_ids,
                     observation_metadata=otu_metadata, 
                     sample_metadata=sample_metadata, table_id=table_id,
                     generated_by=get_generated_by_for_biom_tables(),
                     create_date=datetime.now().isoformat())
    except ValueError as e:
        raise ValueError("Couldn't create OTU table. Is your OTU map empty?"
                         " Original error message: %s" % (str(e)))
Beispiel #4
0
def make_otu_table(otu_map_f,
                   otu_to_taxonomy=None,
                   delim='_',
                   table_id=None,
                   otu_ids_to_exclude=None,
                   sample_metadata=None):
    """Generate a BIOM table from an OTU map

    Parameters
    ----------
    otu_map_f : file-like object
        The OTU map. Jagged tab-separated file where the first column contains
        the OTU ID and subsequent columns contain sequence IDs belonging to
        that OTU
    otu_to_taxonomy : dict, optional
        Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies
    delim : str, optional
        Defaults to "_". The delimiter that is used in the sequence IDs to join
        the sample ID to the sequence number
    table_id : object, optional
        Defaults to ``None``. The identifier that will be given to the
        generated BIOM table
    otu_ids_to_exclude : iterable, optional
        Defaults to ``None``. If present, these OTUs will not be added to the
        OTU table from the OTU map
    sample_metadata : dict of dicts, optional
        Defaults to ``None``. If supplied, keys in the outer dict should be
        sample IDs, and keys in the inner dicts should be column names.
    """
    data, sample_ids, otu_ids = parse_otu_map(
        otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude)

    if otu_to_taxonomy is not None:
        otu_metadata = []
        for o in otu_ids:
            otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])})
    else:
        otu_metadata = None

    # if sample_metadata is supplied, put in index-order with the OTU map's
    # sample_ids, and do not include samples that were in the mapping file
    # but NOT in the OTU map
    if sample_metadata is not None:
        try:
            sample_metadata = [
                sample_metadata[sample_id] for sample_id in sample_ids
            ]
        except KeyError:
            raise KeyError("Sample IDs found in OTU map without sample "
                           "metadata")

    try:
        return Table(data,
                     otu_ids,
                     sample_ids,
                     observation_metadata=otu_metadata,
                     sample_metadata=sample_metadata,
                     table_id=table_id,
                     generated_by=get_generated_by_for_biom_tables(),
                     create_date=datetime.now().isoformat())
    except ValueError as e:
        raise ValueError("Couldn't create OTU table. Is your OTU map empty?"
                         " Original error message: %s" % (str(e)))
Beispiel #5
0
def simsam_range(table,
                 tree,
                 simulated_sample_sizes,
                 dissimilarities,
                 mapping_f=None):
    """Applies sim_otu_table over a range of parameters

     table: the input table to simulate samples from
     tree: tree related OTUs in input table
     simulated_sample_sizes: a list of ints defining how many
      output samples should be create per input sample
     dissimilarities: a list of floats containing the
      dissimilarities to use in simulating tables
     mapping_f: file handle for metadata mapping file, if
      a mapping file should be created with the samples from
      each simulated table

     This function will yield tuples with the following form:
      (output table, output mapping lines, simulated_sample_size, dissimilarity)

     If the user does not provide mapping_f, the tuples will look like:
      (output table, None, simulated_sample_size, dissimilarity)

    """
    if mapping_f is not None:
        # if the user provided a mapping file, load it into
        # a list for repeated use, and define the function for
        # processing the mapping file
        mapping_lines = list(mapping_f)
        process_map = create_replicated_mapping_file
    else:
        # otherwise create a dummy function for processing the
        # mapping file so we don't have to check whether it
        # exists on every iteration
        mapping_lines = None

        def process_map(mapping_lines, simulated_sample_size, sample_ids):
            return None

    for simulated_sample_size in simulated_sample_sizes:
        # create the output mapping file data
        output_mapping_lines = \
            process_map(mapping_lines, simulated_sample_size, table.ids())
        for dissimilarity in dissimilarities:
            # create the simulated otu table
            output_sample_ids, output_otu_ids, output_data, output_metadata = \
                sim_otu_table(table.ids(),
                              table.ids(axis='observation').tolist(),
                              table.iter(),
                              table.metadata(axis='observation'),
                              tree,
                              simulated_sample_size,
                              dissimilarity)
            output_table = Table(
                output_data,
                output_otu_ids,
                output_sample_ids,
                observation_metadata=output_metadata,
                generated_by=get_generated_by_for_biom_tables(),
                create_date=datetime.now().isoformat())
            yield (output_table, output_mapping_lines, simulated_sample_size,
                   dissimilarity)