Ejemplo n.º 1
0
def newick_replace_otuids(tree, biom):
    """
    Replace the OTU ids in the Newick phylogenetic tree format with truncated
    OTU names
    """
    for row in biom['rows']:
        otu_loc = find_otu(row['id'], tree)
        if otu_loc is not None:
            tree = tree[:otu_loc] + oc.otu_name_biom(row) + tree[otu_loc+len(row['id']):]
        else:
            print 'ID not found:', row['id']
    return tree
Ejemplo n.º 2
0
    def test_otu_name_biom(self):
        """
        Testing the otu_name_biom() function of otu_calc.py.

        :return: Returns OK if the test goals were achieved, otherwise
                 raises error.
        """
        self.result = oc.otu_name_biom(self.row[3])
        hand_calc = 'Halanaerobium_Halanaerobiumsaccharolyticum'

        # Testing the validity of otu_name_biom() function
        self.assertEqual(
            self.result, hand_calc,
            msg='Error! otu_name_biom() output does not match manually identified name.'
        )
def write_relative_abundance(rel_abd, biom, out_fn, sort_by=None):
    """
    Given a BIOM table, calculate per-sample relative abundance for
    each OTU and write out to a tab-separated file listing OTUs as
    rows and Samples as columns.
    :type biom: dict (translated json string)
    :param biom: BIOM-formatted OTU/Sample abundance data
    :type out_fn: str
    :param out_fn: The full path to the desired output file.
    :type sort_by: function
    :param sort_by: A function acting as a sorting key that will determine
                     the order in which the Sample IDs appear as columns in
                     the output file.
    """
    with open(out_fn, 'w') as out_f:
        sids = sorted(set([col['id'] for col in biom['columns']]), key=sort_by)
        out_f.write('#OTU ID\t{}\n'.format('\t'.join(sids)))

        for row in biom['rows']:
            otuName = oc.otu_name_biom(row)
            otuid = row['id']
            sabd = [str(rel_abd[sid][otuid]) if sid in rel_abd and otuid in rel_abd[sid] else '0' for sid in sids]
            out_f.write('{}\t{}\n'.format(otuName, '\t'.join(sabd)))
Ejemplo n.º 4
0
def main():
    args = handle_program_options()

    try:
        with open(args.otu_table):
            pass
    except IOError as ioe:
        sys.exit(
            '\nError with OTU_Sample abundance data file:{}\n'
            .format(ioe)
        )

    try:
        with open(args.mapping):
            pass
    except IOError as ioe:
        sys.exit(
            '\nError with mapping file:{}\n'
            .format(ioe)
        )

    # input data
    with open(args.otu_table) as bF:
        biom = json.loads(bF.readline())
    map_header, imap = util.parse_map_file(args.mapping)

    # rewrite tree file with otu names
    if args.input_tree:
        with open(args.input_tree) as treF, open(args.output_tre, 'w') as outF:
            tree = treF.readline()
            if "'" in tree:
                tree = tree.replace("'", '')
            outF.write(newick_replace_otuids(tree, biom))

    oid_rows = {row['id']: row for row in biom['rows']}

    # calculate analysis results
    categories = None
    if args.map_categories is not None:
        categories = args.map_categories.split(',')

    # set transform if --stabilize_variance is specfied
    tform = bc.arcsine_sqrt_transform if args.stabilize_variance else None

    groups = util.gather_categories(imap, map_header, categories)
    for group in groups.values():
        if args.analysis_metric in ['MRA', 'NMRA']:
            results = bc.MRA(biom, group.sids, transform=tform)
        elif args.analysis_metric == 'raw':
            results = bc.transform_raw_abundance(biom, sampleIDs=group.sids,
                                                 sample_abd=False)

        group.results.update({oc.otu_name_biom(oid_rows[oid]): results[oid]
                             for oid in results})

    # write iTol data set file
    with open(args.output_itol_table, 'w') as itolF:
        itolF.write('LABELS\t' + '\t'.join(groups.keys())+'\n')
        itolF.write('COLORS\t{}\n'.format('\t'.join(['#ff0000'
                    for _ in range(len(groups))])))
        all_otus = frozenset({oc.otu_name_biom(row) for row in biom['rows']})

        for oname in all_otus:
            row = ['{name}']        # \t{s:.2f}\t{ns:.2f}\n'
            row_data = {'name': oname}
            msum = 0
            for name, group in groups.iteritems():
                row.append('{{{}:.5f}}'.format(name))
                if oname in group.results:
                    row_data[name] = group.results[oname]
                else:
                    row_data[name] = 0.0
                msum += row_data[name]
            # normalize avg relative abundance data
            if args.analysis_metric == 'NMRA' and msum > 0:
                row_data.update({key: data/msum
                                for key, data in row_data.items()
                                if key != 'name'})

            itolF.write('\t'.join(row).format(**row_data) + '\n')