def collapse_coupled(self):
        '''Return an OTU table that is collapsed in 2 ways: duplicate sequences are
        collapsed together, and samples names are modified, removing r'.1$' and
        r'.2$'.

        '''
        sample_to_sequence_to_otus = OrderedDict()
        reg = re.compile(r'.[12]$')
        for otu in self:
            new_sample = reg.sub('',otu.sample_name)
            otu.sample_name = new_sample
            if new_sample not in sample_to_sequence_to_otus:
                sample_to_sequence_to_otus[new_sample] = OrderedDict()
            if otu.sequence not in sample_to_sequence_to_otus[new_sample]:
                sample_to_sequence_to_otus[new_sample][otu.sequence] = []
            sample_to_sequence_to_otus[new_sample][otu.sequence].append(otu)

        otu_table = OtuTable()
        for sample, seq_otus in sample_to_sequence_to_otus.items():
            for seq, otus in seq_otus.items():
                if len(otus) == 1:
                    otu_table.add(otus)
                else:
                    o = OtuTableEntry()
                    o.marker = otus[0].marker
                    o.sample_name = sample
                    o.sequence = seq
                    o.count = sum([otu.count for otu in otus])
                    o.coverage = sum([otu.coverage for otu in otus])
                    o.taxonomy = otus[0].taxonomy #TODO: Make this more of a 'median' taxonomy.
                    otu_table.add([o])
        return otu_table
Exemple #2
0
    def _appraise_inexactly(self, metagenome_otu_table_collection,
                            found_otu_collection,
                            sequence_identity):
        '''Given a metagenome sample collection and OTUs 'found' either by binning or
        assembly, return a AppraisalBuildingBlock representing the OTUs that
        have been found, using inexact matching.

        '''
        found_otu_table = OtuTable()
        found_otu_table.add(found_otu_collection)
        found_collection = OtuTableCollection()
        found_collection.otu_table_objects = [found_otu_table]

        sample_to_building_block = {}

        for uc in SequenceSearcher().global_search(metagenome_otu_table_collection,
                                         found_otu_collection,
                                         sequence_identity):
            q = uc.query
            if q.sample_name in sample_to_building_block:
                appraisal = sample_to_building_block[q.sample_name]
            else:
                appraisal = AppraisalBuildingBlock()
                sample_to_building_block[q.sample_name] = appraisal

            if uc.target is not None:
                appraisal.num_found += q.count
                appraisal.found_otus.append(q)

        return sample_to_building_block
Exemple #3
0
    def rarefy(self, otu_table_collection, num_to_sample, random_generator=random):
        '''Return an OtuTable rarefied so that only num_to_sample sequences
        are present in each sample. Samples not containing sufficient
        sequences are ignored with a warning.
        
        This is not a true rarefaction technique because sequences not
        chosen in the rarefaction can still influence the output table
        through the LCA or arbitrary choice operation that has been
        carried out on the input table.

        Also, the rarefier operates on counts rather than predicted
        coverage, skeweing the results toward OTUs that lack
        inserts. But not by a lot, presumably.
        
        otu_table_collection: OtuTableCollection
            OTU tables iterable
        num_to_sample: int
            number of sequences to sample from each
        '''

        sample_to_gene_to_otu = {}
        to_return = OtuTable()
        for otu in otu_table_collection:
            sample_name = otu.sample_name
            gene = otu.marker
            if sample_name not in sample_to_gene_to_otu:
                sample_to_gene_to_otu[sample_name] = {}
            if gene not in sample_to_gene_to_otu[sample_name]:
                sample_to_gene_to_otu[sample_name][gene] = {}
            if otu.sequence in sample_to_gene_to_otu[sample_name][gene]:
                raise Exception("Found duplicate sequence in OTU table in sample %s, gene %s" % sample_name, gene)
            sample_to_gene_to_otu[sample_name][gene][otu.sequence] = otu

        for sample_name in sample_to_gene_to_otu.keys():
            for gene in sample_to_gene_to_otu[sample_name].keys():
                sequences_to_sample = []
                for sequence, otu in sample_to_gene_to_otu[sample_name][gene].items():
                    for _ in range(otu.count):
                        sequences_to_sample.append(sequence)
                if len(sequences_to_sample) < num_to_sample:
                    logging.warn("Sample %s gene %s only contains %i sequences, so cannot be rarefied. Ignoring this sample/gene combination" % (sample_name, gene, len(sequences_to_sample)))
                    continue
                else:
                    sequences_sampled = random_generator.sample(sequences_to_sample, num_to_sample)
                    sequence_counts = {}
                for seq in sequences_sampled:
                    try:
                        sequence_counts[seq] += 1
                    except KeyError:
                        sequence_counts[seq] = 1

                for seq, count in sequence_counts.items():
                    otu = sample_to_gene_to_otu[sample_name][gene][seq]
                    e = copy.copy(otu)
                    e.count = count
                    to_return.add([e])
        return to_return
                        
                        
                        
    def collapse_coupled(self):
        '''Return an OTU table that is collapsed in 2 ways: duplicate sequences are
        collapsed together, and samples names are modified, removing r'.1$' and
        r'.2$'.

        '''
        sample_to_sequence_to_otus = OrderedDict()
        reg = re.compile(r'.[12]$')
        for otu in self:
            new_sample = reg.sub('',otu.sample_name)
            otu.sample_name = new_sample
            if new_sample not in sample_to_sequence_to_otus:
                sample_to_sequence_to_otus[new_sample] = OrderedDict()
            if otu.sequence not in sample_to_sequence_to_otus[new_sample]:
                sample_to_sequence_to_otus[new_sample][otu.sequence] = []
            sample_to_sequence_to_otus[new_sample][otu.sequence].append(otu)

        otu_table = OtuTable()
        for sample, seq_otus in sample_to_sequence_to_otus.items():
            for seq, otus in seq_otus.items():
                if len(otus) == 1:
                    otu_table.add(otus)
                else:
                    o = OtuTableEntry()
                    o.marker = otus[0].marker
                    o.sample_name = sample
                    o.sequence = seq
                    o.count = sum([otu.count for otu in otus])
                    o.coverage = sum([otu.coverage for otu in otus])
                    o.taxonomy = otus[0].taxonomy #TODO: Make this more of a 'median' taxonomy.
                    otu_table.add([o])
        return otu_table
Exemple #5
0
    def _appraise_inexactly(self, metagenome_otu_table_collection,
                            found_otu_collection, sequence_identity):
        '''Given a metagenome sample collection and OTUs 'found' either by binning or
        assembly, return a AppraisalBuildingBlock representing the OTUs that
        have been found, using inexact matching.

        '''
        found_otu_table = OtuTable()
        found_otu_table.add(found_otu_collection)
        found_collection = OtuTableCollection()
        found_collection.otu_table_objects = [found_otu_table]

        sample_to_building_block = {}

        for uc in SequenceSearcher().global_search(
                metagenome_otu_table_collection, found_otu_collection,
                sequence_identity):
            q = uc.query
            if q.sample_name in sample_to_building_block:
                appraisal = sample_to_building_block[q.sample_name]
            else:
                appraisal = AppraisalBuildingBlock()
                sample_to_building_block[q.sample_name] = appraisal

            if uc.target is not None:
                appraisal.num_found += q.count
                appraisal.found_otus.append(q)

        return sample_to_building_block
Exemple #6
0
    def rarefy(self, otu_table_collection, num_to_sample, random_generator=random):
        '''Return an OtuTable rarefied so that only num_to_sample sequences
        are present in each sample. Samples not containing sufficient
        sequences are ignored with a warning.

        This is not a true rarefaction technique because sequences not
        chosen in the rarefaction can still influence the output table
        through the LCA or arbitrary choice operation that has been
        carried out on the input table.

        Also, the rarefier operates on counts rather than predicted
        coverage, skeweing the results toward OTUs that lack
        inserts. But not by a lot, presumably.

        otu_table_collection: OtuTableCollection
            OTU tables iterable
        num_to_sample: int
            number of sequences to sample from each
        '''

        sample_to_gene_to_otu = {}
        to_return = OtuTable()
        for otu in otu_table_collection:
            sample_name = otu.sample_name
            gene = otu.marker
            if sample_name not in sample_to_gene_to_otu:
                sample_to_gene_to_otu[sample_name] = {}
            if gene not in sample_to_gene_to_otu[sample_name]:
                sample_to_gene_to_otu[sample_name][gene] = {}
            if otu.sequence in sample_to_gene_to_otu[sample_name][gene]:
                raise Exception("Found duplicate sequence in OTU table in sample %s, gene %s" % sample_name, gene)
            sample_to_gene_to_otu[sample_name][gene][otu.sequence] = otu

        for sample_name in sample_to_gene_to_otu.keys():
            for gene in sample_to_gene_to_otu[sample_name].keys():
                sequences_to_sample = []
                for sequence, otu in sample_to_gene_to_otu[sample_name][gene].items():
                    for _ in range(otu.count):
                        sequences_to_sample.append(sequence)
                if len(sequences_to_sample) < num_to_sample:
                    logging.warn("Sample %s gene %s only contains %i sequences, so cannot be rarefied. Ignoring this sample/gene combination" % (sample_name, gene, len(sequences_to_sample)))
                    continue
                else:
                    sequences_sampled = random_generator.sample(sequences_to_sample, num_to_sample)
                    sequence_counts = {}
                for seq in sequences_sampled:
                    try:
                        sequence_counts[seq] += 1
                    except KeyError:
                        sequence_counts[seq] = 1

                for seq, count in sequence_counts.items():
                    otu = sample_to_gene_to_otu[sample_name][gene][seq]
                    e = copy.copy(otu)
                    e.count = count
                    to_return.add([e])
        return to_return
Exemple #7
0
 def print_appraisal(self, appraisal,
                     output_io=sys.stdout,
                     accounted_for_otu_table_io=None,
                     unaccounted_for_otu_table_io=None):
     '''print the Appraisal object overview to STDOUT'''
     
     output_io.write("\t".join(['sample','num_found','num_not_found','percent_found'])+"\n")
     founds = []
     not_founds = []
     
     def print_sample(num_found, num_not_found, sample, mypercent=None):
         if mypercent:
             percent = mypercent
         elif num_found + num_not_found == 0:
             percent = 0.0
         else:
             percent = float(num_found)/(num_found+num_not_found) * 100
         output_io.write("\t".join([sample, str(num_found), str(num_not_found), "%2.1f" % percent])+"\n")
         
     def mean(l):
         return float(sum(l))/len(l) if len(l) > 0 else float('nan')
     
     if accounted_for_otu_table_io:
         accounted_for_table = OtuTable()
     if unaccounted_for_otu_table_io:
         unaccounted_for_table = OtuTable()
         
     for appraisal_result in appraisal.appraisal_results:
         print_sample(appraisal_result.num_found,
                      appraisal_result.num_not_found,
                      appraisal_result.metagenome_sample_name)
         founds.append(appraisal_result.num_found)
         not_founds.append(appraisal_result.num_not_found)
         if accounted_for_otu_table_io:
             accounted_for_table.add(appraisal_result.found_otus)
         if accounted_for_otu_table_io:
             unaccounted_for_table.add(appraisal_result.not_found_otus)
         
     print_sample(sum(founds), sum(not_founds), 'total')
     
     means = []
     for i, num_found in enumerate(founds):
         num_not_found = not_founds[i]
         means.append(float(num_found)/(num_found+num_not_found))
     print_sample("%2.1f" % mean(founds), "%2.1f" % mean(not_founds), 'average',
                  mypercent=mean(means)*100)
     
     if accounted_for_otu_table_io:
         accounted_for_table.write_to(accounted_for_otu_table_io)
     if unaccounted_for_otu_table_io:
         unaccounted_for_table.write_to(unaccounted_for_otu_table_io)
Exemple #8
0
    def print_samples(self, **kwargs):
        db = SequenceDatabase.acquire(kwargs.pop('db'))
        sample_names = kwargs.pop('sample_names')
        taxonomy = kwargs.pop('taxonomy')
        output_io = kwargs.pop('output_io')
        if len(kwargs) > 0:
            raise Exception("Unexpected arguments detected: %s" % kwargs)

        dbm = self._connect_to_sqlite(db)

        max_set_size = 999  # Cannot query sqlite with > 999 '?' entries, so
        # query in batches.
        if sample_names:
            query_chunks = set(sample_names)
        else:
            query_chunks = [taxonomy]
        otus = OtuTable()
        total_printed = 0
        for chunk in SequenceDatabase.grouper(query_chunks, max_set_size):
            if sample_names:
                it = dbm.table('otus').where_in(
                    'sample_name',
                    [sample for sample in chunk if sample is not None]).get()
            elif taxonomy:
                it = dbm.table('otus').where('taxonomy', 'like',
                                             "%%%s%%" % taxonomy).get()
            else:
                raise Exception("Programming error")

            for entry in it:
                otu = OtuTableEntry()
                otu.marker = entry.marker
                otu.sample_name = entry.sample_name
                otu.sequence = entry.sequence
                otu.count = entry.num_hits
                otu.coverage = entry.coverage
                otu.taxonomy = entry.taxonomy
                otus.add([otu])
                total_printed += 1
        otus.write_to(output_io)
        logging.info("Printed %i OTU table entries" % total_printed)
Exemple #9
0
    def print_samples(self, **kwargs):
        db = SequenceDatabase.acquire(kwargs.pop('db'))
        sample_names = kwargs.pop('sample_names')
        taxonomy = kwargs.pop('taxonomy')
        output_io = kwargs.pop('output_io')
        if len(kwargs) > 0:
            raise Exception("Unexpected arguments detected: %s" % kwargs)

        dbm = self._connect_to_sqlite(db)

        max_set_size = 999 # Cannot query sqlite with > 999 '?' entries, so
                           # query in batches.
        if sample_names:
            query_chunks = set(sample_names)
        else:
            query_chunks = [taxonomy]
        otus = OtuTable()
        total_printed = 0
        for chunk in SequenceDatabase.grouper(query_chunks, max_set_size):
            if sample_names:
                it = dbm.table('otus').where_in(
                    'sample_name', [sample for sample in chunk if sample is not None]).get()
            elif taxonomy:
                it = dbm.table('otus').where(
                    'taxonomy', 'like', "%%%s%%" % taxonomy).get()
            else:
                raise Exception("Programming error")

            for entry in it:
                otu = OtuTableEntry()
                otu.marker = entry.marker
                otu.sample_name = entry.sample_name
                otu.sequence = entry.sequence
                otu.count = entry.num_hits
                otu.coverage = entry.coverage
                otu.taxonomy = entry.taxonomy
                otus.add([otu])
                total_printed += 1
        otus.write_to(output_io)
        logging.info("Printed %i OTU table entries" % total_printed)
Exemple #10
0
    def print_appraisal(self,
                        appraisal,
                        doing_binning,
                        output_io=sys.stdout,
                        doing_assembly=False,
                        binned_otu_table_io=None,
                        unbinned_otu_table_io=None,
                        assembled_otu_table_io=None,
                        unaccounted_for_otu_table_io=None):
        '''print the Appraisal object overview to STDOUT'''

        headers = ['sample']
        if doing_binning: headers.append('num_binned')
        if doing_assembly: headers.append('num_assembled')
        headers.append('num_not_found')
        if doing_binning: headers.append('percent_binned')
        if doing_assembly: headers.append('percent_assembled')
        output_io.write("\t".join(headers) + "\n")

        binned = []
        assembled = []
        assembled_not_binned = []
        not_founds = []

        def print_sample(num_binned,
                         num_assembled,
                         num_assembled_not_binned,
                         num_not_found,
                         sample,
                         mypercent_binned=None,
                         mypercent_assembled=None):
            if mypercent_binned is not None or mypercent_assembled is not None:
                if doing_binning:
                    percent_binned = mypercent_binned
                if doing_assembly:
                    percent_assembled = mypercent_assembled
            else:
                total = num_not_found
                if doing_binning: total += num_binned
                if doing_assembly: total += num_assembled_not_binned
                if total == 0:
                    if doing_binning: percent_binned = 0.0
                    if doing_assembly: percent_assembled = 0.0
                else:
                    if doing_binning:
                        percent_binned = float(num_binned) / total * 100
                    if doing_assembly:
                        percent_assembled = float(num_assembled) / total * 100
            to_write = [sample]
            if doing_binning: to_write.append(str(num_binned))
            if doing_assembly: to_write.append(str(num_assembled))
            to_write.append(str(num_not_found))
            if doing_binning:
                to_write.append("%2.1f" % percent_binned)
            if doing_assembly:
                to_write.append("%2.1f" % percent_assembled)
            output_io.write("\t".join(to_write) + "\n")

        def mean(l):
            return float(sum(l)) / len(l) if len(l) > 0 else float('nan')

        if binned_otu_table_io:
            binned_table = OtuTable()
        if unbinned_otu_table_io:
            unbinned_table = OtuTable()
        if assembled_otu_table_io:
            assembled_table = OtuTable()
        if unaccounted_for_otu_table_io:
            unaccounted_for_table = OtuTable()

        for appraisal_result in appraisal.appraisal_results:
            if doing_assembly:
                num_assembled_not_binned = appraisal_result.num_assembled_not_binned(
                )
            print_sample(
                appraisal_result.num_binned if doing_binning else None,
                appraisal_result.num_assembled if doing_assembly else None,
                num_assembled_not_binned if doing_assembly else None,
                appraisal_result.num_not_found,
                appraisal_result.metagenome_sample_name)
            if doing_binning:
                binned.append(appraisal_result.num_binned)
            if doing_assembly:
                assembled.append(appraisal_result.num_assembled)
                assembled_not_binned.append(num_assembled_not_binned)
            not_founds.append(appraisal_result.num_not_found)
            if binned_otu_table_io:
                binned_table.add(appraisal_result.binned_otus)
            if unbinned_otu_table_io:
                unbinned_table.add(
                    appraisal_result.assembled_not_binned_otus())
            if assembled_otu_table_io:
                assembled_table.add(appraisal_result.assembled_otus)
            if unaccounted_for_otu_table_io:
                unaccounted_for_table.add(appraisal_result.not_found_otus)

        print_sample(
            sum(binned) if doing_binning else None,
            sum(assembled) if doing_assembly else None,
            sum(assembled_not_binned) if doing_assembly else None,
            sum(not_founds), 'total')

        binned_means = []
        assembled_means = []
        if doing_binning:
            to_enumerate = binned
        else:
            to_enumerate = assembled
        for i, _ in enumerate(to_enumerate):
            num_binned = binned[i] if doing_binning else 0
            num_assembled = assembled[i] if doing_assembly else 0
            num_assembled_not_binned = assembled_not_binned[
                i] if doing_assembly else 0
            num_not_found = not_founds[i]
            total = num_assembled_not_binned + num_not_found
            if doing_binning:
                total += num_binned
                binned_means.append(float(num_binned) / total)
            if doing_assembly:
                assembled_means.append(float(num_assembled) / total)
        print_sample("%2.1f" % mean(binned) if doing_binning else None,
                     "%2.1f" % mean(assembled) if doing_assembly else None,
                     None,
                     "%2.1f" % mean(not_founds),
                     'average',
                     mypercent_binned=mean(binned_means) *
                     100 if doing_binning else None,
                     mypercent_assembled=(mean(assembled_means) *
                                          100 if doing_assembly else None))

        if binned_otu_table_io:
            binned_table.write_to(binned_otu_table_io)
        if unbinned_otu_table_io:
            unbinned_table.write_to(unbinned_otu_table_io)
        if assembled_otu_table_io:
            assembled_table.write_to(assembled_otu_table_io)
        if unaccounted_for_otu_table_io:
            unaccounted_for_table.write_to(unaccounted_for_otu_table_io)
Exemple #11
0
    def appraise(self, **kwargs):
        '''Given a collection of OTU tables derived from samples, and OTU
        table(s) corresponding to a collection of recovered genomes, how
        much of the community has been recovered in those genomes?

        Parameters
        ----------
        kwargs:
            sequence_identity: float for 'near enough', None when an exact match is required.
        
        Returns
        -------
        An Appraisal object containing appraisals for each metagenome
        '''
        genome_otu_table_collection = kwargs.pop('genome_otu_table_collection')
        metagenome_otu_table_collection = kwargs.pop('metagenome_otu_table_collection')
        sequence_identity = kwargs.pop('sequence_identity', None)
        if len(kwargs) > 0:
            raise Exception("Unexpected arguments detected: %s" % kwargs)

        logging.info("Read in %i markers from the different genomes" %\
                     len(genome_otu_table_collection))
        filtered_genome_otus = \
            list(genome_otu_table_collection.excluded_duplicate_distinct_genes())
        logging.info("After excluding duplicate markers that may indicate "
                     "contamination, found %i markers" % len(filtered_genome_otus))
        
        if sequence_identity is None:
            genome_otu_sequences = set()
            genome_names = set()
            for otu in filtered_genome_otus:
                genome_otu_sequences.add(otu.sequence)
                genome_names.add(otu.sample_name)
            logging.info("Read in %i unique sequences from the %i reference genomes" %\
                         (len(genome_otu_sequences), len(genome_names)))
            
            # read in metagenome OTU sequences
            sample_name_to_appraisal = {}
            for otu in metagenome_otu_table_collection:
                try:
                    appraisal = sample_name_to_appraisal[otu.sample_name]
                except KeyError:
                    appraisal = AppraisalResult()
                    appraisal.metagenome_sample_name = otu.sample_name
                    sample_name_to_appraisal[otu.sample_name] = appraisal
                    
                count = otu.count
                if otu.sequence in genome_otu_sequences:
                    appraisal.num_found += count
                    appraisal.found_otus.append(otu)
                else:
                    appraisal.num_not_found += count
                    appraisal.not_found_otus.append(otu)
                    
            app = Appraisal()
            app.appraisal_results = sample_name_to_appraisal.values()
            return app
        
        else:
            sample_name_to_appraisal = {}
            seen_otus = set()
            genome_otu_table = OtuTable()
            genome_otu_table.add(filtered_genome_otus)
            filtered_collection = OtuTableCollection()
            filtered_collection.otu_table_objects = [genome_otu_table]
            for uc in SequenceSearcher().global_search(metagenome_otu_table_collection,
                                             filtered_collection,
                                             sequence_identity):
                q = uc.query
                key = str([q.sample_name, q.sequence])
                if key in seen_otus:
                    logging.warn("Double-saw an OTU..")
                    continue
                else:
                    seen_otus.add(key)
                if q.sample_name not in sample_name_to_appraisal:
                    res = AppraisalResult()
                    res.metagenome_sample_name = q.sample_name
                    sample_name_to_appraisal[q.sample_name] = res
                    
                appraisal = sample_name_to_appraisal[q.sample_name]
                if uc.target is None:
                    appraisal.num_not_found += q.count
                    appraisal.not_found_otus.append(q)
                else:
                    appraisal.num_found += q.count
                    appraisal.found_otus.append(q)
                    
            app = Appraisal()
            app.appraisal_results = sample_name_to_appraisal.values()
            return app
Exemple #12
0
    def print_appraisal(self, appraisal,
                        doing_binning,
                        output_io=sys.stdout,
                        doing_assembly=False,
                        binned_otu_table_io=None,
                        unbinned_otu_table_io=None,
                        assembled_otu_table_io=None,
                        unaccounted_for_otu_table_io=None):
        '''print the Appraisal object overview to STDOUT'''

        headers = ['sample']
        if doing_binning: headers.append('num_binned')
        if doing_assembly: headers.append('num_assembled')
        headers.append('num_not_found')
        if doing_binning: headers.append('percent_binned')
        if doing_assembly: headers.append('percent_assembled')
        output_io.write("\t".join(headers)+"\n")

        binned = []
        assembled = []
        assembled_not_binned = []
        not_founds = []

        def print_sample(num_binned, num_assembled, num_assembled_not_binned, num_not_found, sample,
                         mypercent_binned=None, mypercent_assembled=None):
            if mypercent_binned is not None or mypercent_assembled is not None:
                if doing_binning:
                    percent_binned = mypercent_binned
                if doing_assembly:
                    percent_assembled = mypercent_assembled
            else:
                total = num_not_found
                if doing_binning: total += num_binned
                if doing_assembly: total += num_assembled_not_binned
                if total == 0:
                    if doing_binning: percent_binned = 0.0
                    if doing_assembly: percent_assembled = 0.0
                else:
                    if doing_binning:
                        percent_binned = float(num_binned)/total * 100
                    if doing_assembly:
                        percent_assembled = float(num_assembled)/total * 100
            to_write = [sample]
            if doing_binning: to_write.append(str(num_binned))
            if doing_assembly: to_write.append(str(num_assembled))
            to_write.append(str(num_not_found))
            if doing_binning:
                to_write.append("%2.1f" % percent_binned)
            if doing_assembly:
                to_write.append("%2.1f" % percent_assembled)
            output_io.write("\t".join(to_write)+"\n")

        def mean(l):
            return float(sum(l))/len(l) if len(l) > 0 else float('nan')

        if binned_otu_table_io:
            binned_table = OtuTable()
        if unbinned_otu_table_io:
            unbinned_table = OtuTable()
        if assembled_otu_table_io:
            assembled_table = OtuTable()
        if unaccounted_for_otu_table_io:
            unaccounted_for_table = OtuTable()

        for appraisal_result in appraisal.appraisal_results:
            if doing_assembly:
                num_assembled_not_binned = appraisal_result.num_assembled_not_binned()
            print_sample(appraisal_result.num_binned if doing_binning else None,
                         appraisal_result.num_assembled if doing_assembly else None,
                         num_assembled_not_binned if doing_assembly else None,
                         appraisal_result.num_not_found,
                         appraisal_result.metagenome_sample_name)
            if doing_binning:
                binned.append(appraisal_result.num_binned)
            if doing_assembly:
                assembled.append(appraisal_result.num_assembled)
                assembled_not_binned.append(num_assembled_not_binned)
            not_founds.append(appraisal_result.num_not_found)
            if binned_otu_table_io:
                binned_table.add(appraisal_result.binned_otus)
            if unbinned_otu_table_io:
                unbinned_table.add(appraisal_result.assembled_not_binned_otus())
            if assembled_otu_table_io:
                assembled_table.add(appraisal_result.assembled_otus)
            if unaccounted_for_otu_table_io:
                unaccounted_for_table.add(appraisal_result.not_found_otus)

        print_sample(sum(binned) if doing_binning else None,
                     sum(assembled) if doing_assembly else None,
                     sum(assembled_not_binned) if doing_assembly else None,
                     sum(not_founds),
                     'total')

        binned_means = []
        assembled_means = []
        if doing_binning:
            to_enumerate = binned
        else:
            to_enumerate = assembled
        for i, _ in enumerate(to_enumerate):
            num_binned = binned[i] if doing_binning else 0
            num_assembled = assembled[i] if doing_assembly else 0
            num_assembled_not_binned = assembled_not_binned[i] if doing_assembly else 0
            num_not_found = not_founds[i]
            total = num_assembled_not_binned+num_not_found
            if doing_binning:
                total += num_binned
                binned_means.append(float(num_binned)/total)
            if doing_assembly:
                assembled_means.append(float(num_assembled)/total)
        print_sample("%2.1f" % mean(binned) if doing_binning else None,
                     "%2.1f" % mean(assembled) if doing_assembly else None,
                     None,
                     "%2.1f" % mean(not_founds),
                     'average',
                     mypercent_binned=mean(binned_means)*100 if doing_binning else None,
                     mypercent_assembled=(mean(assembled_means)*100 if doing_assembly else None))

        if binned_otu_table_io:
            binned_table.write_to(binned_otu_table_io)
        if unbinned_otu_table_io:
            unbinned_table.write_to(unbinned_otu_table_io)
        if assembled_otu_table_io:
            assembled_table.write_to(assembled_otu_table_io)
        if unaccounted_for_otu_table_io:
            unaccounted_for_table.write_to(unaccounted_for_otu_table_io)