def newick_replace_otuids(tree, biom): """ Replace the OTU ids in the Newick phylogenetic tree format with truncated OTU names """ for row in biom['rows']: otu_loc = find_otu(row['id'], tree) if otu_loc is not None: tree = tree[:otu_loc] + oc.otu_name_biom(row) + tree[otu_loc+len(row['id']):] else: print 'ID not found:', row['id'] return tree
def test_otu_name_biom(self): """ Testing the otu_name_biom() function of otu_calc.py. :return: Returns OK if the test goals were achieved, otherwise raises error. """ self.result = oc.otu_name_biom(self.row[3]) hand_calc = 'Halanaerobium_Halanaerobiumsaccharolyticum' # Testing the validity of otu_name_biom() function self.assertEqual( self.result, hand_calc, msg='Error! otu_name_biom() output does not match manually identified name.' )
def write_relative_abundance(rel_abd, biom, out_fn, sort_by=None): """ Given a BIOM table, calculate per-sample relative abundance for each OTU and write out to a tab-separated file listing OTUs as rows and Samples as columns. :type biom: dict (translated json string) :param biom: BIOM-formatted OTU/Sample abundance data :type out_fn: str :param out_fn: The full path to the desired output file. :type sort_by: function :param sort_by: A function acting as a sorting key that will determine the order in which the Sample IDs appear as columns in the output file. """ with open(out_fn, 'w') as out_f: sids = sorted(set([col['id'] for col in biom['columns']]), key=sort_by) out_f.write('#OTU ID\t{}\n'.format('\t'.join(sids))) for row in biom['rows']: otuName = oc.otu_name_biom(row) otuid = row['id'] sabd = [str(rel_abd[sid][otuid]) if sid in rel_abd and otuid in rel_abd[sid] else '0' for sid in sids] out_f.write('{}\t{}\n'.format(otuName, '\t'.join(sabd)))
def main(): args = handle_program_options() try: with open(args.otu_table): pass except IOError as ioe: sys.exit( '\nError with OTU_Sample abundance data file:{}\n' .format(ioe) ) try: with open(args.mapping): pass except IOError as ioe: sys.exit( '\nError with mapping file:{}\n' .format(ioe) ) # input data with open(args.otu_table) as bF: biom = json.loads(bF.readline()) map_header, imap = util.parse_map_file(args.mapping) # rewrite tree file with otu names if args.input_tree: with open(args.input_tree) as treF, open(args.output_tre, 'w') as outF: tree = treF.readline() if "'" in tree: tree = tree.replace("'", '') outF.write(newick_replace_otuids(tree, biom)) oid_rows = {row['id']: row for row in biom['rows']} # calculate analysis results categories = None if args.map_categories is not None: categories = args.map_categories.split(',') # set transform if --stabilize_variance is specfied tform = bc.arcsine_sqrt_transform if args.stabilize_variance else None groups = util.gather_categories(imap, map_header, categories) for group in groups.values(): if args.analysis_metric in ['MRA', 'NMRA']: results = bc.MRA(biom, group.sids, transform=tform) elif args.analysis_metric == 'raw': results = bc.transform_raw_abundance(biom, sampleIDs=group.sids, sample_abd=False) group.results.update({oc.otu_name_biom(oid_rows[oid]): results[oid] for oid in results}) # write iTol data set file with open(args.output_itol_table, 'w') as itolF: itolF.write('LABELS\t' + '\t'.join(groups.keys())+'\n') itolF.write('COLORS\t{}\n'.format('\t'.join(['#ff0000' for _ in range(len(groups))]))) all_otus = frozenset({oc.otu_name_biom(row) for row in biom['rows']}) for oname in all_otus: row = ['{name}'] # \t{s:.2f}\t{ns:.2f}\n' row_data = {'name': oname} msum = 0 for name, group in groups.iteritems(): row.append('{{{}:.5f}}'.format(name)) if oname in group.results: row_data[name] = group.results[oname] else: row_data[name] = 0.0 msum += row_data[name] # normalize avg relative abundance data if args.analysis_metric == 'NMRA' and msum > 0: row_data.update({key: data/msum for key, data in row_data.items() if key != 'name'}) itolF.write('\t'.join(row).format(**row_data) + '\n')