Example #1
0
    def test_safe_md5(self):
        """Make sure we have the expected md5 with varied input types

        This method is ported from PyCogent (http://www.pycogent.org). PyCogent
        is a GPL project, but we obtained permission from the authors of this
        method to port it to the BIOM Format project (and keep it under BIOM's
        BSD license).
        """
        exp = 'd3b07384d113edec49eaa6238ad5ff00'

        tmp_f = NamedTemporaryFile(
            mode='w',
            prefix='test_safe_md5',
            suffix='txt')
        tmp_f.write('foo\n')
        tmp_f.flush()

        obs = safe_md5(open(tmp_f.name, 'U'))
        self.assertEqual(obs, exp)

        obs = safe_md5(['foo\n'])
        self.assertEqual(obs, exp)

        # unsupported type raises TypeError
        self.assertRaises(TypeError, safe_md5, 42)
Example #2
0
    def test_safe_md5(self):
        """Make sure we have the expected md5 with varied input types

        This method is ported from PyCogent (http://www.pycogent.org). PyCogent
        is a GPL project, but we obtained permission from the authors of this
        method to port it to the BIOM Format project (and keep it under BIOM's
        BSD license).
        """
        exp = 'd3b07384d113edec49eaa6238ad5ff00'

        tmp_f = NamedTemporaryFile(
            mode='w',
            prefix='test_safe_md5',
            suffix='txt')
        tmp_f.write('foo\n')
        tmp_f.flush()

        obs = safe_md5(open(tmp_f.name, 'U'))
        self.assertEqual(obs, exp)

        obs = safe_md5(['foo\n'])
        self.assertEqual(obs, exp)

        # unsupported type raises TypeError
        self.assertRaises(TypeError, safe_md5, 42)
Example #3
0
    def test_safe_md5(self):
        """Make sure we have the expected md5 with varied input types
        
        Modified from PyCogent (www.pycogent.org).
        """
        exp = 'd3b07384d113edec49eaa6238ad5ff00'

        tmp_f = NamedTemporaryFile(mode='w',prefix='test_safe_md5', suffix='txt')
        tmp_f.write('foo\n')
        tmp_f.flush()

        obs = safe_md5(open(tmp_f.name, 'U'))
        self.assertEqual(obs,exp)
        
        obs = safe_md5(['foo\n'])
        self.assertEqual(obs,exp)
        
        # unsupported type raises TypeError
        self.assertRaises(TypeError,safe_md5,42)
Example #4
0
    def test_safe_md5(self):
        """Make sure we have the expected md5
        
        Modified from PyCogent (www.pycogent.org).
        """
        exp = "d3b07384d113edec49eaa6238ad5ff00"

        tmp_f = NamedTemporaryFile(mode="w", prefix="test_safe_md5", suffix="txt")
        tmp_f.write("foo\n")
        tmp_f.flush()

        obs = safe_md5(open(tmp_f.name, "U"))
        self.assertEqual(obs, exp)
Example #5
0
    def run(self, **kwargs):
        """
         table: two-element tuple containing the biom table to summarize and
                the file(-like) object containing the original table data. The
                second element of the tuple (the file(-like) object) may be
                None. If this is the case, the MD5 sum will *not* be computed
         qualitative: counts are presented as number of unique observation
                      ids per sample, rather than total observation count per
                      sample
         suppress_md5: if ``True``, the MD5 sum of the table file contents will
                       not be computed. This parameter is ignored if
                       ``table[1] is None``
        """
        result = {}
        qualitative = kwargs['qualitative']
        table, table_lines = kwargs['table']
        
        min_counts, max_counts, median_counts, mean_counts, counts_per_sample =\
         compute_counts_per_sample_stats(table, qualitative)
        num_observations = len(table.ObservationIds)
        
        suppress_md5 = (table_lines is None) or kwargs['suppress_md5']
    
        counts_per_sample_values = counts_per_sample.values()
    
        if table.SampleMetadata is None:
            sample_md_keys = ["None provided"]
        else:
            sample_md_keys = table.SampleMetadata[0].keys()
        
        if table.ObservationMetadata is None:
            observation_md_keys = ["None provided"]
        else:
            observation_md_keys = table.ObservationMetadata[0].keys()
    
        lines = []
    
        num_samples = len(counts_per_sample)
        lines.append('Num samples: %d' % num_samples)
        lines.append('Num observations: %d' % num_observations)
        if not qualitative:
            total_count = sum(counts_per_sample_values)
            lines.append('Total count: %d' % total_count)
            lines.append('Table density (fraction of non-zero values): %1.3f' % \
                  table.getTableDensity())
        if not suppress_md5:
            lines.append('Table md5 (unzipped): %s' % safe_md5(table_lines))
        lines.append('')

        if qualitative:
            lines.append('Observations/sample summary:')
        else:
            lines.append('Counts/sample summary:')
        lines.append(' Min: %r' % min_counts)
        lines.append(' Max: %r' % max_counts)
        lines.append(' Median: %1.3f' % median_counts)
        lines.append(' Mean: %1.3f' % mean_counts)
        lines.append(' Std. dev.: %1.3f' % std(counts_per_sample_values))
        lines.append(' Sample Metadata Categories: %s' % '; '.join(sample_md_keys))
        lines.append(' Observation Metadata Categories: %s' % '; '.join(observation_md_keys))
     
        lines.append('')
        if qualitative:
            lines.append('Observations/sample detail:')
        else:
            lines.append('Counts/sample detail:')
        
        sorted_counts_per_sample = [(v,k) for k,v in counts_per_sample.items()]
        sorted_counts_per_sample.sort()
        for v,k in sorted_counts_per_sample:
            lines.append(' %s: %r' % (k,v))
        
        result['biom-summary'] = lines
        return result
Example #6
0
    def run(self, **kwargs):
        result = {}
        qualitative = kwargs['qualitative']
        table, table_lines = kwargs['table']

        min_counts, max_counts, median_counts, mean_counts, counts_per_sample =\
            compute_counts_per_sample_stats(table, qualitative)
        num_observations = len(table.observation_ids)

        suppress_md5 = (table_lines is None) or kwargs['suppress_md5']

        counts_per_sample_values = counts_per_sample.values()

        if table.sample_metadata is None:
            sample_md_keys = ["None provided"]
        else:
            sample_md_keys = table.sample_metadata[0].keys()

        if table.observation_metadata is None:
            observation_md_keys = ["None provided"]
        else:
            observation_md_keys = table.observation_metadata[0].keys()

        lines = []

        num_samples = len(table.sample_ids)
        lines.append('Num samples: %d' % num_samples)
        lines.append('Num observations: %d' % num_observations)

        if not qualitative:
            total_count = sum(counts_per_sample_values)
            lines.append('Total count: %d' % total_count)
            lines.append('Table density (fraction of non-zero values): %1.3f' %
                         table.get_table_density())

        if not suppress_md5:
            lines.append('Table md5 (unzipped): %s' % safe_md5(table_lines))
        lines.append('')

        if qualitative:
            lines.append('Observations/sample summary:')
        else:
            lines.append('Counts/sample summary:')

        lines.append(' Min: %r' % min_counts)
        lines.append(' Max: %r' % max_counts)
        lines.append(' Median: %1.3f' % median_counts)
        lines.append(' Mean: %1.3f' % mean_counts)
        lines.append(' Std. dev.: %1.3f' % std(counts_per_sample_values))
        lines.append(
            ' Sample Metadata Categories: %s' %
            '; '.join(sample_md_keys))
        lines.append(
            ' Observation Metadata Categories: %s' %
            '; '.join(observation_md_keys))
        lines.append('')

        if qualitative:
            lines.append('Observations/sample detail:')
        else:
            lines.append('Counts/sample detail:')

        for k, v in sorted(counts_per_sample.items(), key=itemgetter(1)):
            lines.append(' %s: %r' % (k, v))

        result['biom_summary'] = lines
        return result
def main():
    opts,args = parser.parse_args()

    if opts.input_fp is None:
        parser.print_help()
        parser.error('Must specify an input file!')
        
    input_fp = opts.input_fp
    output_fp = opts.output_fp
    table = parse_biom_table(biom_open(input_fp,'U'))
    min_counts, max_counts, median_counts, mean_counts, counts_per_sample =\
     compute_counts_per_sample_stats(table, opts.num_observations)
    num_observations = len(table.ObservationIds)
    suppress_md5 = opts.suppress_md5
    
    counts_per_sample_values = counts_per_sample.values()
    
    try:
        sample_md_keys = table.SampleMetadata[0].keys()
    except TypeError:
        sample_md_keys = ["None provided"]
    try:
        observation_md_keys = table.ObservationMetadata[0].keys()
    except TypeError:
        observation_md_keys = ["None provided"]
    
    lines = []
    
    num_samples = len(counts_per_sample)
    lines.append('Num samples: %s' % str(num_samples))
    lines.append('Num observations: %s' % str(num_observations))
    if not opts.num_observations:
        total_count = sum(counts_per_sample_values)
        lines.append('Total count: %s' % str(total_count))
        lines.append('Table density (fraction of non-zero values): %1.4f' % \
              table.getTableDensity())
    if not suppress_md5:
        lines.append('Table md5 (unzipped): %s' % safe_md5(biom_open(input_fp,'U')))
    lines.append('')

    if opts.num_observations:
        lines.append('Observations/sample summary:')
    else:
        lines.append('Counts/sample summary:')
    lines.append(' Min: %s' % str(min_counts))
    lines.append(' Max: %s' % str(max_counts))
    lines.append(' Median: %s' % str(median_counts))
    lines.append(' Mean: %s' % str(mean_counts))
    lines.append(' Std. dev.: %s' % (str(std(counts_per_sample_values))))
    lines.append(' Sample Metadata Categories: %s' % '; '.join(sample_md_keys))
    lines.append(' Observation Metadata Categories: %s' % '; '.join(observation_md_keys))
     
    lines.append('')
    if opts.num_observations:
        lines.append('Observations/sample detail:')
    else:
        lines.append('Counts/sample detail:')
    sorted_counts_per_sample = [(v,k) for k,v in counts_per_sample.items()]
    sorted_counts_per_sample.sort()
    for v,k in sorted_counts_per_sample:
        lines.append(' %s: %s' % (k,str(v)))
    
    if output_fp != None:
        open(output_fp,'w').write('\n'.join(lines))
    else:
        print '\n'.join(lines)