def test_safe_md5(self): """Make sure we have the expected md5 with varied input types This method is ported from PyCogent (http://www.pycogent.org). PyCogent is a GPL project, but we obtained permission from the authors of this method to port it to the BIOM Format project (and keep it under BIOM's BSD license). """ exp = 'd3b07384d113edec49eaa6238ad5ff00' tmp_f = NamedTemporaryFile( mode='w', prefix='test_safe_md5', suffix='txt') tmp_f.write('foo\n') tmp_f.flush() obs = safe_md5(open(tmp_f.name, 'U')) self.assertEqual(obs, exp) obs = safe_md5(['foo\n']) self.assertEqual(obs, exp) # unsupported type raises TypeError self.assertRaises(TypeError, safe_md5, 42)
def test_safe_md5(self): """Make sure we have the expected md5 with varied input types Modified from PyCogent (www.pycogent.org). """ exp = 'd3b07384d113edec49eaa6238ad5ff00' tmp_f = NamedTemporaryFile(mode='w',prefix='test_safe_md5', suffix='txt') tmp_f.write('foo\n') tmp_f.flush() obs = safe_md5(open(tmp_f.name, 'U')) self.assertEqual(obs,exp) obs = safe_md5(['foo\n']) self.assertEqual(obs,exp) # unsupported type raises TypeError self.assertRaises(TypeError,safe_md5,42)
def test_safe_md5(self): """Make sure we have the expected md5 Modified from PyCogent (www.pycogent.org). """ exp = "d3b07384d113edec49eaa6238ad5ff00" tmp_f = NamedTemporaryFile(mode="w", prefix="test_safe_md5", suffix="txt") tmp_f.write("foo\n") tmp_f.flush() obs = safe_md5(open(tmp_f.name, "U")) self.assertEqual(obs, exp)
def run(self, **kwargs): """ table: two-element tuple containing the biom table to summarize and the file(-like) object containing the original table data. The second element of the tuple (the file(-like) object) may be None. If this is the case, the MD5 sum will *not* be computed qualitative: counts are presented as number of unique observation ids per sample, rather than total observation count per sample suppress_md5: if ``True``, the MD5 sum of the table file contents will not be computed. This parameter is ignored if ``table[1] is None`` """ result = {} qualitative = kwargs['qualitative'] table, table_lines = kwargs['table'] min_counts, max_counts, median_counts, mean_counts, counts_per_sample =\ compute_counts_per_sample_stats(table, qualitative) num_observations = len(table.ObservationIds) suppress_md5 = (table_lines is None) or kwargs['suppress_md5'] counts_per_sample_values = counts_per_sample.values() if table.SampleMetadata is None: sample_md_keys = ["None provided"] else: sample_md_keys = table.SampleMetadata[0].keys() if table.ObservationMetadata is None: observation_md_keys = ["None provided"] else: observation_md_keys = table.ObservationMetadata[0].keys() lines = [] num_samples = len(counts_per_sample) lines.append('Num samples: %d' % num_samples) lines.append('Num observations: %d' % num_observations) if not qualitative: total_count = sum(counts_per_sample_values) lines.append('Total count: %d' % total_count) lines.append('Table density (fraction of non-zero values): %1.3f' % \ table.getTableDensity()) if not suppress_md5: lines.append('Table md5 (unzipped): %s' % safe_md5(table_lines)) lines.append('') if qualitative: lines.append('Observations/sample summary:') else: lines.append('Counts/sample summary:') lines.append(' Min: %r' % min_counts) lines.append(' Max: %r' % max_counts) lines.append(' Median: %1.3f' % median_counts) lines.append(' Mean: %1.3f' % mean_counts) lines.append(' Std. dev.: %1.3f' % std(counts_per_sample_values)) lines.append(' Sample Metadata Categories: %s' % '; '.join(sample_md_keys)) lines.append(' Observation Metadata Categories: %s' % '; '.join(observation_md_keys)) lines.append('') if qualitative: lines.append('Observations/sample detail:') else: lines.append('Counts/sample detail:') sorted_counts_per_sample = [(v,k) for k,v in counts_per_sample.items()] sorted_counts_per_sample.sort() for v,k in sorted_counts_per_sample: lines.append(' %s: %r' % (k,v)) result['biom-summary'] = lines return result
def run(self, **kwargs): result = {} qualitative = kwargs['qualitative'] table, table_lines = kwargs['table'] min_counts, max_counts, median_counts, mean_counts, counts_per_sample =\ compute_counts_per_sample_stats(table, qualitative) num_observations = len(table.observation_ids) suppress_md5 = (table_lines is None) or kwargs['suppress_md5'] counts_per_sample_values = counts_per_sample.values() if table.sample_metadata is None: sample_md_keys = ["None provided"] else: sample_md_keys = table.sample_metadata[0].keys() if table.observation_metadata is None: observation_md_keys = ["None provided"] else: observation_md_keys = table.observation_metadata[0].keys() lines = [] num_samples = len(table.sample_ids) lines.append('Num samples: %d' % num_samples) lines.append('Num observations: %d' % num_observations) if not qualitative: total_count = sum(counts_per_sample_values) lines.append('Total count: %d' % total_count) lines.append('Table density (fraction of non-zero values): %1.3f' % table.get_table_density()) if not suppress_md5: lines.append('Table md5 (unzipped): %s' % safe_md5(table_lines)) lines.append('') if qualitative: lines.append('Observations/sample summary:') else: lines.append('Counts/sample summary:') lines.append(' Min: %r' % min_counts) lines.append(' Max: %r' % max_counts) lines.append(' Median: %1.3f' % median_counts) lines.append(' Mean: %1.3f' % mean_counts) lines.append(' Std. dev.: %1.3f' % std(counts_per_sample_values)) lines.append( ' Sample Metadata Categories: %s' % '; '.join(sample_md_keys)) lines.append( ' Observation Metadata Categories: %s' % '; '.join(observation_md_keys)) lines.append('') if qualitative: lines.append('Observations/sample detail:') else: lines.append('Counts/sample detail:') for k, v in sorted(counts_per_sample.items(), key=itemgetter(1)): lines.append(' %s: %r' % (k, v)) result['biom_summary'] = lines return result
def main(): opts,args = parser.parse_args() if opts.input_fp is None: parser.print_help() parser.error('Must specify an input file!') input_fp = opts.input_fp output_fp = opts.output_fp table = parse_biom_table(biom_open(input_fp,'U')) min_counts, max_counts, median_counts, mean_counts, counts_per_sample =\ compute_counts_per_sample_stats(table, opts.num_observations) num_observations = len(table.ObservationIds) suppress_md5 = opts.suppress_md5 counts_per_sample_values = counts_per_sample.values() try: sample_md_keys = table.SampleMetadata[0].keys() except TypeError: sample_md_keys = ["None provided"] try: observation_md_keys = table.ObservationMetadata[0].keys() except TypeError: observation_md_keys = ["None provided"] lines = [] num_samples = len(counts_per_sample) lines.append('Num samples: %s' % str(num_samples)) lines.append('Num observations: %s' % str(num_observations)) if not opts.num_observations: total_count = sum(counts_per_sample_values) lines.append('Total count: %s' % str(total_count)) lines.append('Table density (fraction of non-zero values): %1.4f' % \ table.getTableDensity()) if not suppress_md5: lines.append('Table md5 (unzipped): %s' % safe_md5(biom_open(input_fp,'U'))) lines.append('') if opts.num_observations: lines.append('Observations/sample summary:') else: lines.append('Counts/sample summary:') lines.append(' Min: %s' % str(min_counts)) lines.append(' Max: %s' % str(max_counts)) lines.append(' Median: %s' % str(median_counts)) lines.append(' Mean: %s' % str(mean_counts)) lines.append(' Std. dev.: %s' % (str(std(counts_per_sample_values)))) lines.append(' Sample Metadata Categories: %s' % '; '.join(sample_md_keys)) lines.append(' Observation Metadata Categories: %s' % '; '.join(observation_md_keys)) lines.append('') if opts.num_observations: lines.append('Observations/sample detail:') else: lines.append('Counts/sample detail:') sorted_counts_per_sample = [(v,k) for k,v in counts_per_sample.items()] sorted_counts_per_sample.sort() for v,k in sorted_counts_per_sample: lines.append(' %s: %s' % (k,str(v))) if output_fp != None: open(output_fp,'w').write('\n'.join(lines)) else: print '\n'.join(lines)