Example #1
0
    def run(self, **kwargs):
        table = kwargs['table']
        sample_metadata = kwargs['sample_metadata']
        observation_metadata = kwargs['observation_metadata']
        sc_separated = kwargs['sc_separated']
        sc_pipe_separated = kwargs['sc_pipe_separated']
        int_fields = kwargs['int_fields']
        float_fields = kwargs['float_fields']
        sample_header = kwargs['sample_header']
        observation_header = kwargs['observation_header']
        output_as = 'json' if kwargs['output_as_json'] else 'hdf5'

        # define metadata processing functions, if any
        process_fns = {}
        if sc_separated is not None:
            process_fns.update(
                dict.fromkeys(sc_separated, self._split_on_semicolons))

        if sc_pipe_separated is not None:
            process_fns.update(
                dict.fromkeys(sc_pipe_separated,
                              self._split_on_semicolons_and_pipes))

        if int_fields is not None:
            process_fns.update(dict.fromkeys(int_fields, self._int))

        if float_fields is not None:
            process_fns.update(dict.fromkeys(float_fields, self._float))

        # parse mapping files
        if sample_metadata is not None:
            sample_metadata = MetadataMap.from_file(sample_metadata,
                                                    process_fns=process_fns,
                                                    header=sample_header)

        if observation_metadata is not None:
            observation_metadata = MetadataMap.from_file(
                observation_metadata,
                process_fns=process_fns,
                header=observation_header)

        if sample_metadata is None and observation_metadata is None:
            raise CommandError('Must specify sample_metadata and/or '
                               'observation_metadata.')

        # NAUGHTY: this is modifying the input table IN PLACE!!! And then
        # RETURNING IT! MetadataAdder is angry!

        # add metadata as necessary
        if sample_metadata:
            table.add_metadata(sample_metadata, axis='sample')

        if observation_metadata:
            table.add_metadata(observation_metadata, axis='observation')

        return {'table': (table, output_as)}
    def run(self, **kwargs):
        table = kwargs['table']
        sample_metadata = kwargs['sample_metadata']
        observation_metadata = kwargs['observation_metadata']
        sc_separated = kwargs['sc_separated']
        sc_pipe_separated = kwargs['sc_pipe_separated']
        int_fields = kwargs['int_fields']
        float_fields = kwargs['float_fields']
        sample_header = kwargs['sample_header']
        observation_header = kwargs['observation_header']
        output_as = 'json' if kwargs['output_as_json'] else 'hdf5'

        # define metadata processing functions, if any
        process_fns = {}
        if sc_separated is not None:
            process_fns.update(dict.fromkeys(sc_separated,
                                             self._split_on_semicolons))

        if sc_pipe_separated is not None:
            process_fns.update(dict.fromkeys(sc_pipe_separated,
                               self._split_on_semicolons_and_pipes))

        if int_fields is not None:
            process_fns.update(dict.fromkeys(int_fields, self._int))

        if float_fields is not None:
            process_fns.update(dict.fromkeys(float_fields, self._float))

        # parse mapping files
        if sample_metadata is not None:
            sample_metadata = MetadataMap.from_file(sample_metadata,
                                                    process_fns=process_fns,
                                                    header=sample_header)

        if observation_metadata is not None:
            observation_metadata = MetadataMap.from_file(
                observation_metadata,
                process_fns=process_fns,
                header=observation_header)

        if sample_metadata is None and observation_metadata is None:
            raise CommandError('Must specify sample_metadata and/or '
                               'observation_metadata.')

        # NAUGHTY: this is modifying the input table IN PLACE!!! And then
        # RETURNING IT! MetadataAdder is angry!

        # add metadata as necessary
        if sample_metadata:
            table.add_metadata(sample_metadata, axis='sample')

        if observation_metadata:
            table.add_metadata(observation_metadata, axis='observation')

        return {'table': (table, output_as)}
Example #3
0
def _add_metadata(table,
                  sample_metadata=None,
                  observation_metadata=None,
                  sc_separated=None,
                  sc_pipe_separated=None,
                  int_fields=None,
                  float_fields=None,
                  sample_header=None,
                  observation_header=None):

    if sample_metadata is None and observation_metadata is None:
        raise ValueError('Must specify sample_metadata and/or '
                         'observation_metadata.')

    # define metadata processing functions, if any
    process_fns = {}
    if sc_separated is not None:
        process_fns.update(dict.fromkeys(sc_separated, _split_on_semicolons))

    if sc_pipe_separated is not None:
        process_fns.update(
            dict.fromkeys(sc_pipe_separated, _split_on_semicolons_and_pipes))

    if int_fields is not None:
        process_fns.update(dict.fromkeys(int_fields, _int))

    if float_fields is not None:
        process_fns.update(dict.fromkeys(float_fields, _float))

    # parse mapping files
    if sample_metadata is not None:
        sample_metadata = MetadataMap.from_file(sample_metadata,
                                                process_fns=process_fns,
                                                header=sample_header)

    if observation_metadata is not None:
        observation_metadata = MetadataMap.from_file(observation_metadata,
                                                     process_fns=process_fns,
                                                     header=observation_header)

    # NAUGHTY: this is modifying the input table IN PLACE!!! And then
    # RETURNING IT! MetadataAdder is angry!

    # add metadata as necessary
    if sample_metadata:
        table.add_metadata(sample_metadata, axis='sample')

    if observation_metadata:
        table.add_metadata(observation_metadata, axis='observation')

    return table
Example #4
0
def _add_metadata(table, sample_metadata=None, observation_metadata=None,
                  sc_separated=None, sc_pipe_separated=None, int_fields=None,
                  float_fields=None, sample_header=None,
                  observation_header=None):

    if sample_metadata is None and observation_metadata is None:
        raise ValueError('Must specify sample_metadata and/or '
                         'observation_metadata.')

    # define metadata processing functions, if any
    process_fns = {}
    if sc_separated is not None:
        process_fns.update(dict.fromkeys(sc_separated,
                                         _split_on_semicolons))

    if sc_pipe_separated is not None:
        process_fns.update(dict.fromkeys(sc_pipe_separated,
                           _split_on_semicolons_and_pipes))

    if int_fields is not None:
        process_fns.update(dict.fromkeys(int_fields, _int))

    if float_fields is not None:
        process_fns.update(dict.fromkeys(float_fields, _float))

    # parse mapping files
    if sample_metadata is not None:
        sample_metadata = MetadataMap.from_file(sample_metadata,
                                                process_fns=process_fns,
                                                header=sample_header)

    if observation_metadata is not None:
        observation_metadata = MetadataMap.from_file(
            observation_metadata,
            process_fns=process_fns,
            header=observation_header)

    # NAUGHTY: this is modifying the input table IN PLACE!!! And then
    # RETURNING IT! MetadataAdder is angry!

    # add metadata as necessary
    if sample_metadata:
        table.add_metadata(sample_metadata, axis='sample')

    if observation_metadata:
        table.add_metadata(observation_metadata, axis='observation')

    return table
Example #5
0
def create_biom_file(vcf_fps, output_fp, mapping_fp=None, zip=None):
    master_table, master_observation_ids = merge_otu_tables(vcf_fps)
    if zip == 'gz':
        output_master_f = gzip.open(join(output_fp, 'master_table.biom.zip'),
                                    'wb')
        output_filtered_f = gzip.open(
            join(output_fp, 'filtered_table.biom.zip'), 'wb')
    else:
        output_master_f = open(join(output_fp, 'master_table.biom'), 'w')
        output_filtered_f = open(join(output_fp, 'filtered_table.biom'), 'w')
    if mapping_fp is not None:
        mapping_f = MetadataMap.fromFile(mapping_fp)
        master_table.addSampleMetadata(mapping_f)

    master_table.getBiomFormatJsonString(generatedby(),
                                         direct_io=output_master_f)

    #create a function to filter table by
    def filter_function(values, id, md):
        return id in master_observation_ids

    filtered_table = master_table.filterObservations(filter_function)
    filtered_table.getBiomFormatJsonString(generatedby(),
                                           direct_io=output_filtered_f)
    output_master_f.close()
    output_filtered_f.close()
Example #6
0
def load_metadata(lines):
    """Parse a sample/observation metadata file, return a ``MetadataMap``.

    If ``lines`` is ``None``, this function will return ``None``.
    """
    if lines is not None:
        return MetadataMap.from_file(lines)

    return None
    def setUp(self):
        """Set up data for use in unit tests."""
        self.cmd = TableConverter()

        self.biom_lines1 = biom1
        self.biom_table1 = parse_biom_table(self.biom_lines1)

        self.classic_lines1 = classic1.split('\n')

        self.sample_md1 = MetadataMap.from_file(sample_md1.split('\n'))
    def setUp(self):
        """Set up data for use in unit tests."""
        self.cmd = TableConverter()

        self.biom_lines1 = biom1
        self.biom_table1 = parse_biom_table(self.biom_lines1)

        self.classic_lines1 = classic1.split('\n')

        self.sample_md1 = MetadataMap.from_file(sample_md1.split('\n'))
Example #9
0
def load_metadata(fp):
    """Parse a sample/observation metadata file, return a ``MetadataMap``.
    
    If ``fp`` is ``None``, this function will return ``None``.
    """
    if fp is None:
        return None
    else:
        with open(fp, 'U') as f:
            return MetadataMap.fromFile(f)
Example #10
0
def add_metadata_to_biom_table(biom_input_fp, taxonomy_map_fp, biom_output_fp):
    '''Load biom, add metadata, write to new table'''
    newbiom = load_table(biom_input_fp)
    if stat(taxonomy_map_fp).st_size == 0:
        metadata = {}
    else:
        metadata = MetadataMap.from_file(taxonomy_map_fp,
                                         header=['Sample ID', 'taxonomy', 'c'])
    newbiom.add_metadata(metadata, 'observation')
    write_biom_table(newbiom, 'json', biom_output_fp)
Example #11
0
def load_metadata(fp):
    """Parse a sample/observation metadata file, return a ``MetadataMap``.

    If ``fp`` is ``None``, this function will return ``None``.
    """
    if fp is None:
        return None
    else:
        with open(fp, 'U') as f:
            return MetadataMap.from_file(f)
Example #12
0
def convert(input_fp, output_fp, sample_metadata_fp, observation_metadata_fp,
            to_json, to_hdf5, to_tsv, collapsed_samples,
            collapsed_observations, header_key, output_metadata_id, table_type,
            process_obs_metadata, tsv_metadata_formatter):
    """Convert to/from the BIOM table format.

    Convert between BIOM table formats. See examples here:
    http://biom-format.org/documentation/biom_conversion.html

    Example usage:

    Convert a "classic" BIOM file (tab-separated text) to an HDF5 BIOM
    formatted OTU table:

    $ biom convert -i table.txt -o table.biom --to-hdf5
    """
    if sum([to_tsv, to_hdf5, to_json]) > 1:
        raise ValueError("--to-tsv, --to-json, and --to-hdf5 are mutually "
                         "exclusive. You can only pass one of these options.")

    table = load_table(input_fp)
    if sample_metadata_fp is not None:
        with open(sample_metadata_fp, 'U') as f:
            sample_metadata_f = MetadataMap.from_file(f)
    else:
        sample_metadata_f = None
    if observation_metadata_fp is not None:
        with open(observation_metadata_fp, 'U') as f:
            observation_metadata_f = MetadataMap.from_file(f)
    else:
        observation_metadata_f = None

    _convert(table, output_fp, sample_metadata_f, observation_metadata_f,
             to_json, to_hdf5, to_tsv, collapsed_samples,
             collapsed_observations, header_key, output_metadata_id,
             table_type, process_obs_metadata, tsv_metadata_formatter)
Example #13
0
def convert(input_fp, output_fp, sample_metadata_fp, observation_metadata_fp,
            to_json, to_hdf5, to_tsv, collapsed_samples,
            collapsed_observations, header_key, output_metadata_id, table_type,
            process_obs_metadata, tsv_metadata_formatter):
    """Convert to/from the BIOM table format.

    Convert between BIOM table formats. See examples here:
    http://biom-format.org/documentation/biom_conversion.html

    Example usage:

    Convert a "classic" BIOM file (tab-separated text) to an HDF5 BIOM
    formatted OTU table:

    $ biom convert -i table.txt -o table.biom --to-hdf5
    """
    if sum([to_tsv, to_hdf5, to_json]) > 1:
        raise ValueError("--to-tsv, --to-json, and --to-hdf5 are mutually "
                         "exclusive. You can only pass one of these options.")

    table = load_table(input_fp)
    if sample_metadata_fp is not None:
        with open(sample_metadata_fp, 'U') as f:
            sample_metadata_f = MetadataMap.from_file(f)
    else:
        sample_metadata_f = None
    if observation_metadata_fp is not None:
        with open(observation_metadata_fp, 'U') as f:
            observation_metadata_f = MetadataMap.from_file(f)
    else:
        observation_metadata_f = None

    _convert(table, output_fp, sample_metadata_f, observation_metadata_f,
             to_json, to_hdf5, to_tsv, collapsed_samples,
             collapsed_observations, header_key, output_metadata_id,
             table_type, process_obs_metadata, tsv_metadata_formatter)
    def setUp(self):
        """Set up data for use in unit tests."""
        self.cmd = TableConverter()

        self.biom_lines1 = biom1
        self.biom_table1 = parse_biom_table(self.biom_lines1)

        self.classic_lines1 = classic1.split('\n')

        self.sample_md1 = MetadataMap.from_file(sample_md1.split('\n'))

        test_data_dir = join(dirname(abspath(__file__)), 'test_data')
        self.json_collapsed_obs = join(test_data_dir,
                                       'json_obs_collapsed.biom')
        self.json_collapsed_samples = join(test_data_dir,
                                           'json_sample_collapsed.biom')
Example #15
0
def create_biom_file(vcf_fp, output_fp, mapping_fp=None, zip=None):
    if vcf_fp.endswith('gz'):
        vcf_f = gzip.open(vcf_fp)
    elif vcf_fp.endswith('vcf'):
        vcf_f = open(vcf_fp, 'U')
    else:
        raise ValueError, "Invalid file format or extension, only '.vcf' or '.vcf.gz' are\
accepted"
    data, observation_ids, sample_ids =\
    create_table_factory_objects(vcf_f)
    sample_md = None
    table = table_factory(data, sample_ids=sample_ids, observation_ids=observation_ids, constructor=SparseOTUTable)
    if mapping_fp != None:
        mapping_f = MetadataMap.fromFile(mapping_fp)
        biom_table.addSampleMetadata(mapping_f)
    if zip == 'gz':
        output_f = gzip.open('%s.%s' % (output_fp, zip), 'wb')
    else:
        output_f = open(output_fp, 'w')
    table.getBiomFormatJsonString(generatedby(), direct_io=output_f)
    output_f.close()
def create_biom_file(vcf_fps, output_fp, mapping_fp=None, zip=None):
    master_table, master_observation_ids = merge_otu_tables(vcf_fps)
    if zip == 'gz':
        output_master_f = gzip.open(join(output_fp, 'master_table.biom.zip'), 'wb')
        output_filtered_f = gzip.open(join(output_fp, 'filtered_table.biom.zip'), 'wb')
    else:
        output_master_f = open(join(output_fp, 'master_table.biom'), 'w')
        output_filtered_f = open(join(output_fp, 'filtered_table.biom'), 'w')
    if mapping_fp is not None:
        mapping_f = MetadataMap.fromFile(mapping_fp)
        master_table.addSampleMetadata(mapping_f)
        
    master_table.getBiomFormatJsonString(generatedby(), direct_io=output_master_f)

    #create a function to filter table by
    def filter_function(values, id, md):
            return id in master_observation_ids

    filtered_table = master_table.filterObservations(filter_function)
    filtered_table.getBiomFormatJsonString(generatedby(), direct_io=output_filtered_f)
    output_master_f.close()
    output_filtered_f.close()
Example #17
0
    def setUp(self):
        """Set up data for use in unit tests."""
        self.cmd = _convert
        self.output_filepath = tempfile.NamedTemporaryFile().name

        with tempfile.NamedTemporaryFile('w') as fh:
            fh.write(biom1)
            fh.flush()
            self.biom_table1 = load_table(fh.name)

        self.biom_lines1 = biom1.split('\n')
        with tempfile.NamedTemporaryFile('w') as fh:
            fh.write(classic1)
            fh.flush()
            self.classic_biom1 = load_table(fh.name)

        self.sample_md1 = MetadataMap.from_file(sample_md1.split('\n'))

        test_data_dir = join(dirname(abspath(__file__)), 'test_data')
        self.json_collapsed_obs = join(test_data_dir,
                                       'json_obs_collapsed.biom')
        self.json_collapsed_samples = join(test_data_dir,
                                           'json_sample_collapsed.biom')
    def setUp(self):
        """Set up data for use in unit tests."""
        self.cmd = _convert
        self.output_filepath = tempfile.NamedTemporaryFile().name

        with tempfile.NamedTemporaryFile('w') as fh:
            fh.write(biom1)
            fh.flush()
            self.biom_table1 = load_table(fh.name)

        self.biom_lines1 = biom1.split('\n')
        with tempfile.NamedTemporaryFile('w') as fh:
            fh.write(classic1)
            fh.flush()
            self.classic_biom1 = load_table(fh.name)

        self.sample_md1 = MetadataMap.from_file(sample_md1.split('\n'))

        test_data_dir = join(dirname(abspath(__file__)), 'test_data')
        self.json_collapsed_obs = join(test_data_dir,
                                       'json_obs_collapsed.biom')
        self.json_collapsed_samples = join(test_data_dir,
                                           'json_sample_collapsed.biom')
Example #19
0
 def checkfiles(self, filetype):
     # define how files should be checked for, it is important that import functions work!
     if filetype is 'count' and self.count_file:
         for x in self.count_file:
             try:
                 biomtab = biom.load_table(x)
                 self.checks += "Loaded count table from " + x + ". \n\n"
                 biomdims = biomtab.shape
                 self.checks += "This table contains " + str(biomdims[0]) + " taxa and " + str(biomdims[1]) +\
                                " samples. \n\n"
             except (TypeError, BiomParseException):
                 wx.LogError("Cannot parse biom file '%s'." % x)
                 logger.error("Cannot parse biom file. \n", exc_info=True)
     if filetype is 'biom' and self.biom_file:
         for x in self.biom_file:
             try:
                 biomtab = biom.load_table(x)
                 self.checks += "Loaded BIOM file from " + x + ". \n\n"
                 biomdims = biomtab.shape
                 self.checks += "This BIOM file contains " + str(biomdims[0]) + " taxa and " + str(biomdims[1]) +\
                                " samples. \n\n"
                 names = biomtab.metadata(biomtab.ids(axis='sample')[0],
                                          axis="sample")
                 if names is not None:
                     varlist = list()
                     for key, value in names.items():
                         varlist.append(key)
                     names = '\n'.join(varlist)
                     self.checks += "The sample data contains the following variables: \n" + names + "\n"
                     pub.sendMessage('receive_metadata', msg=varlist)
                 names = biomtab.metadata(
                     biomtab.ids(axis='observation')[0], axis='observation')
                 if names is not None:
                     self.checks += "This BIOM file contains taxonomy data. \n\n"
                     pub.sendMessage('receive_tax', msg='added_tax')
             except (TypeError, BiomParseException):
                 wx.LogError(
                     str(x) +
                     ' does not appear to be a BIOM-compatible table!')
                 logger.error(
                     str(x) +
                     ' does not appear to be a BIOM-compatible table!. ',
                     exc_info=True)
     if filetype is 'tax' and self.tax_file:
         for x, z in zip(self.count_file, self.tax_file):
             try:
                 biomtab = biom.load_table(x)
                 obs_f = open(z, 'r')
                 obs_data = MetadataMap.from_file(obs_f)
                 obs_f.close()
                 # for taxonomy collapsing,
                 # metadata variable needs to be a complete list
                 # not separate entries for each tax level
                 for i in list(obs_data):
                     tax = list()
                     for j in list(obs_data[i]):
                         tax.append(obs_data[i][j])
                         obs_data[i].pop(j, None)
                     obs_data[i]['taxonomy'] = tax
                 biomtab.add_metadata(obs_data, axis='observation')
                 self.checks += "Loaded taxonomy table from " + z + ". \n\n"
                 pub.sendMessage('receive_tax', msg='added_tax')
             except (TypeError, ValueError, BiomParseException):
                 wx.LogError(
                     str(x) + ' and ' + str(z) +
                     ' cannot be combined into a BIOM file!')
                 logger.error(str(x) + ' and ' + str(z) +
                              ' cannot be combined into a BIOM file! ',
                              exc_info=True)
     if filetype is 'meta' and self.sample_file:
         meta_dict = dict()
         for x, z in zip(self.count_file, self.sample_file):
             try:
                 biomtab = biom.load_table(x)
                 sample_f = open(z, 'r')
                 sample_data = MetadataMap.from_file(sample_f)
                 sample_f.close()
                 biomtab.add_metadata(sample_data, axis='sample')
                 self.checks += "Loaded sample data from " + z + ". \n\n"
                 data = biomtab.metadata_to_dataframe(axis='sample')
                 allnames = data.columns
                 num_cols = data._get_numeric_data()
                 names = list(set(allnames) - set(num_cols))
                 varlist = list()
                 for name in names:
                     varlist.append(name)
                 names = '\n'.join(allnames)
                 self.checks += "The sample data contains the following variables: \n" + names + "\n"
                 meta_dict[x] = varlist
             except (TypeError, KeyError, ValueError, BiomParseException):
                 wx.LogError(
                     str(x) + ' and ' + str(z) +
                     ' cannot be combined into a BIOM file!')
                 logger.error(str(x) + ' and ' + str(z) +
                              ' cannot be combined into a BIOM file! ',
                              exc_info=True)
         pub.sendMessage('input_metadata', msg=(meta_dict, self.split))
     if filetype is 'network':
         try:
             nets_object = get_input(self.settings)
             nets_object.add_networks()
             self.checks += "Network objects could be added successfully."
         except (TypeError, ValueError):
             wx.LogError('Unable to load network edge list!')
             logger.error('Unable to load network edge list! ')
     self.summ_box.SetValue(self.checks)
Example #20
0
def get_input(inputs, publish=False):
    """
    Takes all input and returns a dictionary of biom files.
    If tab-delimited files are supplied, these are combined
    into a biom file. File names are used as keys.
    This is mostly a utility wrapper, as all biom-related functions
    are from biom-format.org.

    At the moment, rarefaction is performed after sample splitting.
    This means that samples with uneven sequence counts will not
    be rarefied to equal depths.

    All files are written to BIOM files, while a settings file is also written to disk
    for use by other massoc commands.

    :param inputs: Dictionary of inputs.
    :param publish: If True, publishes messages to be received by GUI.
    :return:
    """
    # handler to file
    # construct logger after filepath is provided
    _create_logger(inputs['fp'])
    if inputs['biom_file'] is not None:
        logger.info('BIOM file(s) to process: ' + ", ".join(inputs['biom_file']))
    if inputs['otu_table'] is not None:
        logger.info('Tab-delimited OTU table(s) to process: ' + ", ".join(inputs['otu_table']))
    if inputs['tax_table'] is not None:
        if len(inputs['otu_table']) is not len(inputs['tax_table']):
            logger.error("Add a taxonomy table for every OTU table!", exc_info=True)
            raise ValueError("Add a taxonomy table for every OTU table!")
    if inputs['sample_data'] is not None:
        if len(inputs['otu_table']) is not len(inputs['sample_data']):
            logger.error("Add a sample data table for every OTU table!", exc_info=True)
            raise ValueError("Add a sample data table for every OTU table!")
    if inputs['otu_meta'] is not None:
        if len(inputs['otu_table']) is not len(inputs['otu_meta']):
            logger.error("Add a metadata table for every OTU table!", exc_info=True)
            raise ValueError("Add a metadata table for every OTU table!")
    filestore = {}
    if inputs['biom_file'] is None and inputs['network'] is None:
        if inputs['otu_table'] is None and inputs['network'] is None:
            logger.error("Please supply either a biom file"
                         ", a tab-delimited OTU table or a network!", exc_info=True)
            raise ValueError("Please supply either a biom file"
                             ", a tab-delimited OTU table or a network!")
    # Only process count files if present
    i = 0
    if inputs['name'] is None:
        inputs['name'] = list()
        inputs['name'].append('file_')
    if inputs['biom_file'] is not None:
        try:
            for x in inputs['biom_file']:
                biomtab = load_table(x)
                filestore[inputs['name'][i]] = biomtab
                i += 1
        except Exception:
            logger.error("Failed to import BIOM files.", exc_info=True)
    if inputs['otu_table'] is not None:
        try:
            j = 0  # j is used to match sample + tax data to OTU data
            for x in inputs['otu_table']:
                input_fp = x
                sample_metadata_fp = None
                observation_metadata_fp = None
                obs_data = None
                sample_data = None
                biomtab = load_table(input_fp)
                try:
                    sample_metadata_fp = inputs['sample_data'][j]
                    observation_metadata_fp = inputs['tax_table'][j]
                except TypeError or KeyError:
                    pass
                if sample_metadata_fp is not None:
                    sample_f = open(sample_metadata_fp, 'r')
                    sample_data = MetadataMap.from_file(sample_f)
                    sample_f.close()
                    biomtab.add_metadata(sample_data, axis='sample')
                if observation_metadata_fp is not None:
                    obs_f = open(observation_metadata_fp, 'r')
                    obs_data = MetadataMap.from_file(obs_f)
                    obs_f.close()
                    # for taxonomy collapsing,
                    # metadata variable needs to be a complete list
                    # not separate entries for each tax level
                    for b in list(obs_data):
                        tax = list()
                        for l in list(obs_data[b]):
                            tax.append(obs_data[b][l])
                            obs_data[b].pop(l, None)
                        obs_data[b]['taxonomy'] = tax
                    biomtab.add_metadata(obs_data, axis='observation')
                filestore[inputs['name'][j]] = biomtab
                j += 1
        except Exception:
            logger.warning("Failed to combine input files.", exc_info=True)
    bioms = Batch({'otu': filestore}, inputs)
    # it is possible that there are forbidden characters in the OTU identifiers
    # we can forbid people from using those, or replace those with an underscore
    if inputs['biom_file'] or inputs['otu_table']:
        for name in bioms.otu:
            biomfile = bioms.otu[name]
            taxon_ids = biomfile._observation_ids  # need to be careful with these operations
            taxon_index = biomfile._obs_index      # likely to corrupt BIOM file if done wrong
            new_ids = deepcopy(taxon_ids)
            new_indexes = deepcopy(taxon_index)
            for i in range(0, len(taxon_ids)):
                id = taxon_ids[i]
                new_id = id.replace(" ", "_")
                new_ids[i] = new_id
                new_indexes[new_id] = new_indexes.pop(id)
            biomfile._observation_ids = new_ids
            biomfile._obs_index = new_indexes
            bioms.otu[name] = biomfile
        logger.info('Collapsing taxonomy... ')
        bioms.collapse_tax()
        if inputs['cluster'] is not None:
            if publish:
                pub.sendMessage('update', msg='Clustering BIOM files...')
            logger.info('Clustering BIOM files... ')
            bioms.cluster_biom()
        if inputs['split'] is not None and inputs['split'] is not 'TRUE':
            bioms.split_biom()
        if inputs['min'] is not None:
            if publish:
                pub.sendMessage('update', msg='Setting minimum mean abundance...')
            logger.info('Removing taxa below minimum count... ')
            bioms.prev_filter(mode='min')
        if inputs['prev'] is not None:
            if publish:
                pub.sendMessage('update', msg='Setting prevalence filter...')
            logger.info('Setting prevalence filter... ')
            bioms.prev_filter(mode='prev')
        if inputs['rar'] is not None:
            if publish:
                pub.sendMessage('update', msg='Rarefying counts...')
            logger.info('Rarefying counts... ')
            bioms.rarefy()
    bioms.inputs['procbioms'] = dict()
    if inputs['biom_file'] or inputs['otu_table']:
        if 'otu' not in bioms.inputs['levels']: # add otu level always
            bioms.inputs['procbioms']['otu'] = dict()
            for name in bioms.inputs['name']:
                biomname = bioms.inputs['fp'] + '/' + name + '_' + 'otu' + '.hdf5'
                bioms.inputs['procbioms']['otu'][name] = biomname
        for level in bioms.inputs['levels']:
            bioms.inputs['procbioms'][level] = dict()
            for name in bioms.inputs['name']:
                biomname = bioms.inputs['fp'] + '/' + name + '_' + level + '.hdf5'
                bioms.inputs['procbioms'][level][name] = biomname
        all_bioms = {**bioms.otu, **bioms.genus, **bioms.family, **bioms.order,
                     **bioms.class_, **bioms.phylum}
        for biomfile in all_bioms:
            if all_bioms[biomfile].shape[0] == 1:
                logger.error("The current preprocessing steps resulted in BIOM files with only 1 row.", exc_info=True)
    if inputs['network'] is not None:
        if publish:
            pub.sendMessage('update', msg='Checking previously generated networks...')
        logger.info('Checking previously generated networks...')
        filelist = deepcopy(inputs['network'])
        for file in filelist:
            network = _read_network(file)
            nodes = len(network.nodes)
            edges = len(network.edges)
            logger.info("This network has " + str(nodes) + \
                           " nodes and " + str(edges) + " edges.")
            weight = nx.get_edge_attributes(network, 'weight')
            if len(weight) > 0:
                logger.info('This is a weighted network.')
            else:
                logger.info('This is an unweighted network.')
    try:
        if inputs['biom_file'] or inputs['otu_table']:
            bioms.write_bioms()
            logger.info('BIOM files written to disk.  ')
    except Exception:
        logger.warning('Failed to write BIOM files to disk.  ', exc_info=True)
    write_settings(bioms.inputs)
    logger.info('Settings file written to disk.  ')